{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 导入使用的python模块"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import numpy as np\n",
    "import pandas as pd\n",
    "from IPython.display import display\n",
    "import matplotlib.pyplot as plt"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#  加载数据"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>id</th>\n",
       "      <th>job</th>\n",
       "      <th>jobtag</th>\n",
       "      <th>salary</th>\n",
       "      <th>edu</th>\n",
       "      <th>exp</th>\n",
       "      <th>company</th>\n",
       "      <th>pubdate</th>\n",
       "      <th>address</th>\n",
       "      <th>benefits</th>\n",
       "      <th>comtype</th>\n",
       "      <th>finance</th>\n",
       "      <th>comsize</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1</td>\n",
       "      <td>JAVA高级研发工程师—阿拉丁科技</td>\n",
       "      <td>Java,Android,IOS</td>\n",
       "      <td>20k-40k</td>\n",
       "      <td>本科</td>\n",
       "      <td>经验3-5年</td>\n",
       "      <td>阿拉丁控股集团有限公司</td>\n",
       "      <td>2019-08-21</td>\n",
       "      <td>北京·大望路</td>\n",
       "      <td>五险一金，周末双休</td>\n",
       "      <td>金融</td>\n",
       "      <td>不需要融资</td>\n",
       "      <td>150-500人</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>2</td>\n",
       "      <td>高级Java工程师</td>\n",
       "      <td>后端,Java,服务器端</td>\n",
       "      <td>25k-35k</td>\n",
       "      <td>本科</td>\n",
       "      <td>经验3-5年</td>\n",
       "      <td>小帮规划</td>\n",
       "      <td>2019-08-16</td>\n",
       "      <td>北京·望京</td>\n",
       "      <td>牛人多，氛围好，福利好</td>\n",
       "      <td>移动互联网,金融</td>\n",
       "      <td>B轮</td>\n",
       "      <td>150-500人</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>3</td>\n",
       "      <td>Java开发工程师</td>\n",
       "      <td>Java</td>\n",
       "      <td>20k-40k</td>\n",
       "      <td>本科</td>\n",
       "      <td>经验3-5年</td>\n",
       "      <td>玩吧</td>\n",
       "      <td>2019-08-20</td>\n",
       "      <td>北京·东城区</td>\n",
       "      <td>七险一金,弹性不打卡,免费午餐,季度旅游</td>\n",
       "      <td>社交</td>\n",
       "      <td>B轮</td>\n",
       "      <td>150-500人</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>4</td>\n",
       "      <td>java开发工程师</td>\n",
       "      <td>支付,Java</td>\n",
       "      <td>18k-30k</td>\n",
       "      <td>本科</td>\n",
       "      <td>经验5-10年</td>\n",
       "      <td>汇付</td>\n",
       "      <td>2019-08-21</td>\n",
       "      <td>上海·虹梅路</td>\n",
       "      <td>团队好,绩效奖金,环境好,氛围好</td>\n",
       "      <td>移动互联网,金融</td>\n",
       "      <td>上市公司</td>\n",
       "      <td>500-2000人</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>5</td>\n",
       "      <td>高级Java开发工程师</td>\n",
       "      <td>大数据,智能硬件,平台,MySQL,Java,后端</td>\n",
       "      <td>20k-35k</td>\n",
       "      <td>本科</td>\n",
       "      <td>经验3-5年</td>\n",
       "      <td>涂鸦智能</td>\n",
       "      <td>2019-08-21</td>\n",
       "      <td>杭州·西溪</td>\n",
       "      <td>福利好、定期团建、环境氛围好</td>\n",
       "      <td>移动互联网,硬件</td>\n",
       "      <td>C轮</td>\n",
       "      <td>500-2000人</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "  id                job                     jobtag   salary edu      exp  \\\n",
       "0  1  JAVA高级研发工程师—阿拉丁科技           Java,Android,IOS  20k-40k  本科   经验3-5年   \n",
       "1  2          高级Java工程师               后端,Java,服务器端  25k-35k  本科   经验3-5年   \n",
       "2  3          Java开发工程师                       Java  20k-40k  本科   经验3-5年   \n",
       "3  4          java开发工程师                    支付,Java  18k-30k  本科  经验5-10年   \n",
       "4  5        高级Java开发工程师  大数据,智能硬件,平台,MySQL,Java,后端  20k-35k  本科   经验3-5年   \n",
       "\n",
       "       company     pubdate address              benefits   comtype finance  \\\n",
       "0  阿拉丁控股集团有限公司  2019-08-21  北京·大望路             五险一金，周末双休        金融   不需要融资   \n",
       "1         小帮规划  2019-08-16   北京·望京           牛人多，氛围好，福利好  移动互联网,金融      B轮   \n",
       "2           玩吧  2019-08-20  北京·东城区  七险一金,弹性不打卡,免费午餐,季度旅游        社交      B轮   \n",
       "3           汇付  2019-08-21  上海·虹梅路      团队好,绩效奖金,环境好,氛围好  移动互联网,金融    上市公司   \n",
       "4         涂鸦智能  2019-08-21   杭州·西溪        福利好、定期团建、环境氛围好  移动互联网,硬件      C轮   \n",
       "\n",
       "     comsize  \n",
       "0   150-500人  \n",
       "1   150-500人  \n",
       "2   150-500人  \n",
       "3  500-2000人  \n",
       "4  500-2000人  "
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>id</th>\n",
       "      <th>job</th>\n",
       "      <th>jobtag</th>\n",
       "      <th>salary</th>\n",
       "      <th>edu</th>\n",
       "      <th>exp</th>\n",
       "      <th>company</th>\n",
       "      <th>pubdate</th>\n",
       "      <th>address</th>\n",
       "      <th>benefits</th>\n",
       "      <th>comtype</th>\n",
       "      <th>finance</th>\n",
       "      <th>comsize</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>81048</th>\n",
       "      <td>80637</td>\n",
       "      <td>销售</td>\n",
       "      <td>医疗健康,通信/网络设备,大客户销售,客户代表</td>\n",
       "      <td>10k-20k</td>\n",
       "      <td>大专</td>\n",
       "      <td>经验3-5年</td>\n",
       "      <td>深圳市艾唯尔科技有限公司</td>\n",
       "      <td>2019-08-23</td>\n",
       "      <td>深圳·龙岗区</td>\n",
       "      <td>五险一金、双休、提成、带薪年假</td>\n",
       "      <td>消费生活,硬件</td>\n",
       "      <td>A轮</td>\n",
       "      <td>50-150人</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>85288</th>\n",
       "      <td>84830</td>\n",
       "      <td>行政前台</td>\n",
       "      <td>前台</td>\n",
       "      <td>3k-4k</td>\n",
       "      <td>大专</td>\n",
       "      <td>经验不限</td>\n",
       "      <td>荣耀网络</td>\n",
       "      <td>2019-08-22</td>\n",
       "      <td>广州·海珠区</td>\n",
       "      <td>奖金丰富、免费午晚餐提供、团建活动</td>\n",
       "      <td>移动互联网,游戏</td>\n",
       "      <td>天使轮</td>\n",
       "      <td>50-150人</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>57162</th>\n",
       "      <td>56954</td>\n",
       "      <td>运营总监</td>\n",
       "      <td>云计算</td>\n",
       "      <td>20k-40k</td>\n",
       "      <td>本科</td>\n",
       "      <td>经验5-10年</td>\n",
       "      <td>性能魔方</td>\n",
       "      <td>2019-08-21</td>\n",
       "      <td>上海·南京东路</td>\n",
       "      <td>公司高速发展中</td>\n",
       "      <td>数据服务</td>\n",
       "      <td>上市公司</td>\n",
       "      <td>50-150人</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "          id   job                   jobtag   salary edu      exp  \\\n",
       "81048  80637    销售  医疗健康,通信/网络设备,大客户销售,客户代表  10k-20k  大专   经验3-5年   \n",
       "85288  84830  行政前台                       前台    3k-4k  大专     经验不限   \n",
       "57162  56954  运营总监                      云计算  20k-40k  本科  经验5-10年   \n",
       "\n",
       "            company     pubdate  address           benefits   comtype finance  \\\n",
       "81048  深圳市艾唯尔科技有限公司  2019-08-23   深圳·龙岗区    五险一金、双休、提成、带薪年假   消费生活,硬件      A轮   \n",
       "85288          荣耀网络  2019-08-22   广州·海珠区  奖金丰富、免费午晚餐提供、团建活动  移动互联网,游戏     天使轮   \n",
       "57162          性能魔方  2019-08-21  上海·南京东路            公司高速发展中      数据服务    上市公司   \n",
       "\n",
       "       comsize  \n",
       "81048  50-150人  \n",
       "85288  50-150人  \n",
       "57162  50-150人  "
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>id</th>\n",
       "      <th>job</th>\n",
       "      <th>jobtag</th>\n",
       "      <th>salary</th>\n",
       "      <th>edu</th>\n",
       "      <th>exp</th>\n",
       "      <th>company</th>\n",
       "      <th>pubdate</th>\n",
       "      <th>address</th>\n",
       "      <th>benefits</th>\n",
       "      <th>comtype</th>\n",
       "      <th>finance</th>\n",
       "      <th>comsize</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>102640</th>\n",
       "      <td>102090</td>\n",
       "      <td>游戏策划</td>\n",
       "      <td>游戏</td>\n",
       "      <td>8k-16k</td>\n",
       "      <td>不限</td>\n",
       "      <td>经验不限</td>\n",
       "      <td>放学后教育</td>\n",
       "      <td>2019-08-17</td>\n",
       "      <td>深圳·南山区</td>\n",
       "      <td>期权分红，看过就不想走的办公氛围和环境</td>\n",
       "      <td>教育,游戏</td>\n",
       "      <td>不需要融资</td>\n",
       "      <td>少于15人</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>102641</th>\n",
       "      <td>102091</td>\n",
       "      <td>游戏策划</td>\n",
       "      <td>专项奖金,五险一金,通讯津贴,交通补助</td>\n",
       "      <td>10k-20k</td>\n",
       "      <td>不限</td>\n",
       "      <td>经验3-5年</td>\n",
       "      <td>盛天网络</td>\n",
       "      <td>2019-08-19</td>\n",
       "      <td>武汉·东湖新技术开发区</td>\n",
       "      <td>上市公司，发展空间大，福利完善</td>\n",
       "      <td>游戏,文娱丨内容</td>\n",
       "      <td>上市公司</td>\n",
       "      <td>500-2000人</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>102642</th>\n",
       "      <td>102092</td>\n",
       "      <td>游戏策划</td>\n",
       "      <td>带薪年假,定期体检,弹性工作,年度旅游</td>\n",
       "      <td>5k-8k</td>\n",
       "      <td>本科</td>\n",
       "      <td>经验1-3年</td>\n",
       "      <td>碧游</td>\n",
       "      <td>2019-08-19</td>\n",
       "      <td>杭州·文三路</td>\n",
       "      <td>弹性工作时间 扁平管理</td>\n",
       "      <td>游戏</td>\n",
       "      <td>不需要融资</td>\n",
       "      <td>50-150人</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "            id   job               jobtag   salary edu     exp company  \\\n",
       "102640  102090  游戏策划                   游戏   8k-16k  不限    经验不限   放学后教育   \n",
       "102641  102091  游戏策划  专项奖金,五险一金,通讯津贴,交通补助  10k-20k  不限  经验3-5年    盛天网络   \n",
       "102642  102092  游戏策划  带薪年假,定期体检,弹性工作,年度旅游    5k-8k  本科  经验1-3年      碧游   \n",
       "\n",
       "           pubdate      address             benefits   comtype finance  \\\n",
       "102640  2019-08-17       深圳·南山区  期权分红，看过就不想走的办公氛围和环境     教育,游戏   不需要融资   \n",
       "102641  2019-08-19  武汉·东湖新技术开发区      上市公司，发展空间大，福利完善  游戏,文娱丨内容    上市公司   \n",
       "102642  2019-08-19       杭州·文三路          弹性工作时间 扁平管理        游戏   不需要融资   \n",
       "\n",
       "          comsize  \n",
       "102640      少于15人  \n",
       "102641  500-2000人  \n",
       "102642    50-150人  "
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "lagou = pd.read_csv('./dataset/lagou_all.csv',sep = '^',names = [\"id\", \"job\", \"jobtag\", \"salary\", \n",
    "                                                                 \"edu\", \"exp\", \"company\", \"pubdate\",\n",
    "                                                                 \"address\", \"benefits\", \"comtype\", \"finance\", \n",
    "                                                                 \"comsize\"])\n",
    "display(lagou.head())\n",
    "\n",
    "# 随机抽取3行\n",
    "display(lagou.sample(3))\n",
    "\n",
    "# 后三行\n",
    "display(lagou.tail(3))\n",
    "\n",
    "# 显示所有列\n",
    "pd.set_option('display.max_columns',None)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 查看dataframe的属性"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "RangeIndex(start=0, stop=102643, step=1)\n",
      "Index(['id', 'job', 'jobtag', 'salary', 'edu', 'exp', 'company', 'pubdate',\n",
      "       'address', 'benefits', 'comtype', 'finance', 'comsize'],\n",
      "      dtype='object')\n",
      "[['1' 'JAVA高级研发工程师—阿拉丁科技' 'Java,Android,IOS' ... '金融' '不需要融资' '150-500人']\n",
      " ['2' '高级Java工程师' '后端,Java,服务器端' ... '移动互联网,金融' 'B轮' '150-500人']\n",
      " ['3' 'Java开发工程师' 'Java' ... '社交' 'B轮' '150-500人']\n",
      " ...\n",
      " ['102090' '游戏策划' '游戏' ... '教育,游戏' '不需要融资' '少于15人']\n",
      " ['102091' '游戏策划' '专项奖金,五险一金,通讯津贴,交通补助' ... '游戏,文娱丨内容' '上市公司' '500-2000人']\n",
      " ['102092' '游戏策划' '带薪年假,定期体检,弹性工作,年度旅游' ... '游戏' '不需要融资' '50-150人']]\n"
     ]
    }
   ],
   "source": [
    "print(lagou.index)      # 查看索引信息 \n",
    "print(lagou.columns)    # 查看表头\n",
    "print(lagou.values)     # 查看数据列表"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "(102643, 13)\n",
      "2\n",
      "id          object\n",
      "job         object\n",
      "jobtag      object\n",
      "salary      object\n",
      "edu         object\n",
      "exp         object\n",
      "company     object\n",
      "pubdate     object\n",
      "address     object\n",
      "benefits    object\n",
      "comtype     object\n",
      "finance     object\n",
      "comsize     object\n",
      "dtype: object\n"
     ]
    }
   ],
   "source": [
    "print(lagou.shape)  # 返回几行几列\n",
    "print(lagou.ndim)   # 返回维度\n",
    "print(lagou.dtypes) # 返回各列的类型"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "None\n",
      "None\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th>columnsname</th>\n",
       "      <th>id</th>\n",
       "      <th>job</th>\n",
       "      <th>jobtag</th>\n",
       "      <th>salary</th>\n",
       "      <th>edu</th>\n",
       "      <th>exp</th>\n",
       "      <th>company</th>\n",
       "      <th>pubdate</th>\n",
       "      <th>address</th>\n",
       "      <th>benefits</th>\n",
       "      <th>comtype</th>\n",
       "      <th>finance</th>\n",
       "      <th>comsize</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>indexname</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1</td>\n",
       "      <td>JAVA高级研发工程师—阿拉丁科技</td>\n",
       "      <td>Java,Android,IOS</td>\n",
       "      <td>20k-40k</td>\n",
       "      <td>本科</td>\n",
       "      <td>经验3-5年</td>\n",
       "      <td>阿拉丁控股集团有限公司</td>\n",
       "      <td>2019-08-21</td>\n",
       "      <td>北京·大望路</td>\n",
       "      <td>五险一金，周末双休</td>\n",
       "      <td>金融</td>\n",
       "      <td>不需要融资</td>\n",
       "      <td>150-500人</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>2</td>\n",
       "      <td>高级Java工程师</td>\n",
       "      <td>后端,Java,服务器端</td>\n",
       "      <td>25k-35k</td>\n",
       "      <td>本科</td>\n",
       "      <td>经验3-5年</td>\n",
       "      <td>小帮规划</td>\n",
       "      <td>2019-08-16</td>\n",
       "      <td>北京·望京</td>\n",
       "      <td>牛人多，氛围好，福利好</td>\n",
       "      <td>移动互联网,金融</td>\n",
       "      <td>B轮</td>\n",
       "      <td>150-500人</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>3</td>\n",
       "      <td>Java开发工程师</td>\n",
       "      <td>Java</td>\n",
       "      <td>20k-40k</td>\n",
       "      <td>本科</td>\n",
       "      <td>经验3-5年</td>\n",
       "      <td>玩吧</td>\n",
       "      <td>2019-08-20</td>\n",
       "      <td>北京·东城区</td>\n",
       "      <td>七险一金,弹性不打卡,免费午餐,季度旅游</td>\n",
       "      <td>社交</td>\n",
       "      <td>B轮</td>\n",
       "      <td>150-500人</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "columnsname id                job            jobtag   salary edu     exp  \\\n",
       "indexname                                                                  \n",
       "0            1  JAVA高级研发工程师—阿拉丁科技  Java,Android,IOS  20k-40k  本科  经验3-5年   \n",
       "1            2          高级Java工程师      后端,Java,服务器端  25k-35k  本科  经验3-5年   \n",
       "2            3          Java开发工程师              Java  20k-40k  本科  经验3-5年   \n",
       "\n",
       "columnsname      company     pubdate address              benefits   comtype  \\\n",
       "indexname                                                                      \n",
       "0            阿拉丁控股集团有限公司  2019-08-21  北京·大望路             五险一金，周末双休        金融   \n",
       "1                   小帮规划  2019-08-16   北京·望京           牛人多，氛围好，福利好  移动互联网,金融   \n",
       "2                     玩吧  2019-08-20  北京·东城区  七险一金,弹性不打卡,免费午餐,季度旅游        社交   \n",
       "\n",
       "columnsname finance   comsize  \n",
       "indexname                      \n",
       "0             不需要融资  150-500人  \n",
       "1                B轮  150-500人  \n",
       "2                B轮  150-500人  "
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "print(lagou.index.name)\n",
    "print(lagou.columns.name)\n",
    "lagou.index.name = 'indexname'\n",
    "lagou.columns.name = 'columnsname'\n",
    "display(lagou.head(3))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 查看数据信息"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th>columnsname</th>\n",
       "      <th>id</th>\n",
       "      <th>job</th>\n",
       "      <th>jobtag</th>\n",
       "      <th>salary</th>\n",
       "      <th>edu</th>\n",
       "      <th>exp</th>\n",
       "      <th>company</th>\n",
       "      <th>pubdate</th>\n",
       "      <th>address</th>\n",
       "      <th>benefits</th>\n",
       "      <th>comtype</th>\n",
       "      <th>finance</th>\n",
       "      <th>comsize</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>count</th>\n",
       "      <td>102643</td>\n",
       "      <td>102444</td>\n",
       "      <td>102201</td>\n",
       "      <td>102444</td>\n",
       "      <td>102062</td>\n",
       "      <td>102062</td>\n",
       "      <td>102062</td>\n",
       "      <td>102062</td>\n",
       "      <td>102062</td>\n",
       "      <td>102062</td>\n",
       "      <td>101680</td>\n",
       "      <td>101680</td>\n",
       "      <td>101680</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>unique</th>\n",
       "      <td>102395</td>\n",
       "      <td>26784</td>\n",
       "      <td>35266</td>\n",
       "      <td>473</td>\n",
       "      <td>5</td>\n",
       "      <td>7</td>\n",
       "      <td>17722</td>\n",
       "      <td>37</td>\n",
       "      <td>2405</td>\n",
       "      <td>37458</td>\n",
       "      <td>677</td>\n",
       "      <td>9</td>\n",
       "      <td>10</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>top</th>\n",
       "      <td>多劳多得，不劳不得</td>\n",
       "      <td>产品经理</td>\n",
       "      <td>游戏</td>\n",
       "      <td>10k-20k</td>\n",
       "      <td>本科</td>\n",
       "      <td>经验3-5年</td>\n",
       "      <td>字节跳动</td>\n",
       "      <td>2019-08-22</td>\n",
       "      <td>深圳·南山区</td>\n",
       "      <td>五险一金</td>\n",
       "      <td>移动互联网</td>\n",
       "      <td>不需要融资</td>\n",
       "      <td>150-500人</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>freq</th>\n",
       "      <td>18</td>\n",
       "      <td>1228</td>\n",
       "      <td>1147</td>\n",
       "      <td>7121</td>\n",
       "      <td>59533</td>\n",
       "      <td>35339</td>\n",
       "      <td>929</td>\n",
       "      <td>36843</td>\n",
       "      <td>4054</td>\n",
       "      <td>677</td>\n",
       "      <td>10201</td>\n",
       "      <td>29808</td>\n",
       "      <td>25564</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "columnsname         id     job  jobtag   salary     edu     exp company  \\\n",
       "count           102643  102444  102201   102444  102062  102062  102062   \n",
       "unique          102395   26784   35266      473       5       7   17722   \n",
       "top          多劳多得，不劳不得    产品经理      游戏  10k-20k      本科  经验3-5年    字节跳动   \n",
       "freq                18    1228    1147     7121   59533   35339     929   \n",
       "\n",
       "columnsname     pubdate address benefits comtype finance   comsize  \n",
       "count            102062  102062   102062  101680  101680    101680  \n",
       "unique               37    2405    37458     677       9        10  \n",
       "top          2019-08-22  深圳·南山区     五险一金   移动互联网   不需要融资  150-500人  \n",
       "freq              36843    4054      677   10201   29808     25564  "
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "count     102201\n",
      "unique     35266\n",
      "top           游戏\n",
      "freq        1147\n",
      "Name: jobtag, dtype: object\n"
     ]
    }
   ],
   "source": [
    "display(lagou.describe())\n",
    "print(lagou['jobtag'].describe())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "<class 'pandas.core.frame.DataFrame'>\n",
      "RangeIndex: 102643 entries, 0 to 102642\n",
      "Data columns (total 13 columns):\n",
      " #   Column    Non-Null Count   Dtype \n",
      "---  ------    --------------   ----- \n",
      " 0   id        102643 non-null  object\n",
      " 1   job       102444 non-null  object\n",
      " 2   jobtag    102201 non-null  object\n",
      " 3   salary    102444 non-null  object\n",
      " 4   edu       102062 non-null  object\n",
      " 5   exp       102062 non-null  object\n",
      " 6   company   102062 non-null  object\n",
      " 7   pubdate   102062 non-null  object\n",
      " 8   address   102062 non-null  object\n",
      " 9   benefits  102062 non-null  object\n",
      " 10  comtype   101680 non-null  object\n",
      " 11  finance   101680 non-null  object\n",
      " 12  comsize   101680 non-null  object\n",
      "dtypes: object(13)\n",
      "memory usage: 10.2+ MB\n"
     ]
    }
   ],
   "source": [
    "# 检测缺失值\n",
    "lagou.info()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "columnsname\n",
       "id            0\n",
       "job         199\n",
       "jobtag      442\n",
       "salary      199\n",
       "edu         581\n",
       "exp         581\n",
       "company     581\n",
       "pubdate     581\n",
       "address     581\n",
       "benefits    581\n",
       "comtype     963\n",
       "finance     963\n",
       "comsize     963\n",
       "dtype: int64"
      ]
     },
     "execution_count": 8,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 查看每个字段各有多少个空值\n",
    "lagou.isnull().sum()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 查看单独的日期列"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "indexname\n",
       "0    2019-08-21\n",
       "1    2019-08-16\n",
       "2    2019-08-20\n",
       "3    2019-08-21\n",
       "4    2019-08-21\n",
       "5    2019-08-13\n",
       "6    2019-08-20\n",
       "7    2019-08-21\n",
       "8    2019-08-21\n",
       "9    2019-08-21\n",
       "Name: pubdate, dtype: object"
      ]
     },
     "execution_count": 9,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "lagou['pubdate'].head(10)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "indexname\n",
       "0    JAVA高级研发工程师—阿拉丁科技\n",
       "1            高级Java工程师\n",
       "2            Java开发工程师\n",
       "3            java开发工程师\n",
       "4          高级Java开发工程师\n",
       "5            Java开发工程师\n",
       "6            Java软件工程师\n",
       "7      Java研发工程师（财务方向）\n",
       "8              Java工程师\n",
       "9            java开发工程师\n",
       "Name: job, dtype: object"
      ]
     },
     "execution_count": 10,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "lagou.job.head(10)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 数据预处理"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 重新加载数据"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>id</th>\n",
       "      <th>job</th>\n",
       "      <th>jobtag</th>\n",
       "      <th>salary</th>\n",
       "      <th>edu</th>\n",
       "      <th>exp</th>\n",
       "      <th>company</th>\n",
       "      <th>pubdate</th>\n",
       "      <th>address</th>\n",
       "      <th>benefits</th>\n",
       "      <th>comtype</th>\n",
       "      <th>finance</th>\n",
       "      <th>comsize</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1</td>\n",
       "      <td>JAVA高级研发工程师—阿拉丁科技</td>\n",
       "      <td>Java,Android,IOS</td>\n",
       "      <td>20k-40k</td>\n",
       "      <td>本科</td>\n",
       "      <td>经验3-5年</td>\n",
       "      <td>阿拉丁控股集团有限公司</td>\n",
       "      <td>2019-08-21</td>\n",
       "      <td>北京·大望路</td>\n",
       "      <td>五险一金，周末双休</td>\n",
       "      <td>金融</td>\n",
       "      <td>不需要融资</td>\n",
       "      <td>150-500人</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>2</td>\n",
       "      <td>高级Java工程师</td>\n",
       "      <td>后端,Java,服务器端</td>\n",
       "      <td>25k-35k</td>\n",
       "      <td>本科</td>\n",
       "      <td>经验3-5年</td>\n",
       "      <td>小帮规划</td>\n",
       "      <td>2019-08-16</td>\n",
       "      <td>北京·望京</td>\n",
       "      <td>牛人多，氛围好，福利好</td>\n",
       "      <td>移动互联网,金融</td>\n",
       "      <td>B轮</td>\n",
       "      <td>150-500人</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>3</td>\n",
       "      <td>Java开发工程师</td>\n",
       "      <td>Java</td>\n",
       "      <td>20k-40k</td>\n",
       "      <td>本科</td>\n",
       "      <td>经验3-5年</td>\n",
       "      <td>玩吧</td>\n",
       "      <td>2019-08-20</td>\n",
       "      <td>北京·东城区</td>\n",
       "      <td>七险一金,弹性不打卡,免费午餐,季度旅游</td>\n",
       "      <td>社交</td>\n",
       "      <td>B轮</td>\n",
       "      <td>150-500人</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>4</td>\n",
       "      <td>java开发工程师</td>\n",
       "      <td>支付,Java</td>\n",
       "      <td>18k-30k</td>\n",
       "      <td>本科</td>\n",
       "      <td>经验5-10年</td>\n",
       "      <td>汇付</td>\n",
       "      <td>2019-08-21</td>\n",
       "      <td>上海·虹梅路</td>\n",
       "      <td>团队好,绩效奖金,环境好,氛围好</td>\n",
       "      <td>移动互联网,金融</td>\n",
       "      <td>上市公司</td>\n",
       "      <td>500-2000人</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>5</td>\n",
       "      <td>高级Java开发工程师</td>\n",
       "      <td>大数据,智能硬件,平台,MySQL,Java,后端</td>\n",
       "      <td>20k-35k</td>\n",
       "      <td>本科</td>\n",
       "      <td>经验3-5年</td>\n",
       "      <td>涂鸦智能</td>\n",
       "      <td>2019-08-21</td>\n",
       "      <td>杭州·西溪</td>\n",
       "      <td>福利好、定期团建、环境氛围好</td>\n",
       "      <td>移动互联网,硬件</td>\n",
       "      <td>C轮</td>\n",
       "      <td>500-2000人</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "  id                job                     jobtag   salary edu      exp  \\\n",
       "0  1  JAVA高级研发工程师—阿拉丁科技           Java,Android,IOS  20k-40k  本科   经验3-5年   \n",
       "1  2          高级Java工程师               后端,Java,服务器端  25k-35k  本科   经验3-5年   \n",
       "2  3          Java开发工程师                       Java  20k-40k  本科   经验3-5年   \n",
       "3  4          java开发工程师                    支付,Java  18k-30k  本科  经验5-10年   \n",
       "4  5        高级Java开发工程师  大数据,智能硬件,平台,MySQL,Java,后端  20k-35k  本科   经验3-5年   \n",
       "\n",
       "       company     pubdate address              benefits   comtype finance  \\\n",
       "0  阿拉丁控股集团有限公司  2019-08-21  北京·大望路             五险一金，周末双休        金融   不需要融资   \n",
       "1         小帮规划  2019-08-16   北京·望京           牛人多，氛围好，福利好  移动互联网,金融      B轮   \n",
       "2           玩吧  2019-08-20  北京·东城区  七险一金,弹性不打卡,免费午餐,季度旅游        社交      B轮   \n",
       "3           汇付  2019-08-21  上海·虹梅路      团队好,绩效奖金,环境好,氛围好  移动互联网,金融    上市公司   \n",
       "4         涂鸦智能  2019-08-21   杭州·西溪        福利好、定期团建、环境氛围好  移动互联网,硬件      C轮   \n",
       "\n",
       "     comsize  \n",
       "0   150-500人  \n",
       "1   150-500人  \n",
       "2   150-500人  \n",
       "3  500-2000人  \n",
       "4  500-2000人  "
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "lagou = pd.read_csv('lagou_all.csv',sep = '^',names = [\"id\", \"job\", \"jobtag\", \"salary\", \"edu\", \"exp\", \"company\", \"pubdate\", \"address\", \"benefits\", \"comtype\", \"finance\", \"comsize\"])\n",
    "display(lagou.head())"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 查看数据信息"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "<class 'pandas.core.frame.DataFrame'>\n",
      "RangeIndex: 102643 entries, 0 to 102642\n",
      "Data columns (total 13 columns):\n",
      " #   Column    Non-Null Count   Dtype \n",
      "---  ------    --------------   ----- \n",
      " 0   id        102643 non-null  object\n",
      " 1   job       102444 non-null  object\n",
      " 2   jobtag    102201 non-null  object\n",
      " 3   salary    102444 non-null  object\n",
      " 4   edu       102062 non-null  object\n",
      " 5   exp       102062 non-null  object\n",
      " 6   company   102062 non-null  object\n",
      " 7   pubdate   102062 non-null  object\n",
      " 8   address   102062 non-null  object\n",
      " 9   benefits  102062 non-null  object\n",
      " 10  comtype   101680 non-null  object\n",
      " 11  finance   101680 non-null  object\n",
      " 12  comsize   101680 non-null  object\n",
      "dtypes: object(13)\n",
      "memory usage: 10.2+ MB\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "None"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "display(lagou.info())  # 检测是否有空值"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 去除重复数据"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "102643\n"
     ]
    }
   ],
   "source": [
    "print(len(lagou))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "102424\n"
     ]
    }
   ],
   "source": [
    "lagou.drop_duplicates(inplace = True)\n",
    "print(len(lagou))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 找出带空值的数据"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>id</th>\n",
       "      <th>job</th>\n",
       "      <th>jobtag</th>\n",
       "      <th>salary</th>\n",
       "      <th>edu</th>\n",
       "      <th>exp</th>\n",
       "      <th>company</th>\n",
       "      <th>pubdate</th>\n",
       "      <th>address</th>\n",
       "      <th>benefits</th>\n",
       "      <th>comtype</th>\n",
       "      <th>finance</th>\n",
       "      <th>comsize</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>84</th>\n",
       "      <td>85</td>\n",
       "      <td>java</td>\n",
       "      <td>NaN</td>\n",
       "      <td>15k-30k</td>\n",
       "      <td>本科</td>\n",
       "      <td>经验3-5年</td>\n",
       "      <td>中森顺和</td>\n",
       "      <td>2019-08-21</td>\n",
       "      <td>北京·朝阳区</td>\n",
       "      <td>15k-30K 有能力者工资面谈</td>\n",
       "      <td>区块链</td>\n",
       "      <td>未融资</td>\n",
       "      <td>15-50人</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>420</th>\n",
       "      <td>421</td>\n",
       "      <td>Java</td>\n",
       "      <td>NaN</td>\n",
       "      <td>7k-14k</td>\n",
       "      <td>本科</td>\n",
       "      <td>经验3-5年</td>\n",
       "      <td>信雅达</td>\n",
       "      <td>2019-08-22</td>\n",
       "      <td>郑州·郑东新区</td>\n",
       "      <td>五险一金,双休,餐补</td>\n",
       "      <td>移动互联网</td>\n",
       "      <td>上市公司</td>\n",
       "      <td>2000人以上</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>499</th>\n",
       "      <td>500</td>\n",
       "      <td>c++</td>\n",
       "      <td>NaN</td>\n",
       "      <td>8k-10k</td>\n",
       "      <td>本科</td>\n",
       "      <td>经验1-3年</td>\n",
       "      <td>成都锦运达科技有限公司</td>\n",
       "      <td>2019-08-21</td>\n",
       "      <td>成都·温江区</td>\n",
       "      <td>团建，拓展活动，五险一金，年底奖金</td>\n",
       "      <td>硬件,移动互联网</td>\n",
       "      <td>未融资</td>\n",
       "      <td>15-50人</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>971</th>\n",
       "      <td>972</td>\n",
       "      <td>php</td>\n",
       "      <td>NaN</td>\n",
       "      <td>8k-10k</td>\n",
       "      <td>本科</td>\n",
       "      <td>经验3-5年</td>\n",
       "      <td>思酷</td>\n",
       "      <td>2019-08-20</td>\n",
       "      <td>苏州·松陵</td>\n",
       "      <td>高速成长，精英团队，工作稳定</td>\n",
       "      <td>移动互联网,电商</td>\n",
       "      <td>不需要融资</td>\n",
       "      <td>50-150人</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1658</th>\n",
       "      <td>1659</td>\n",
       "      <td>数据挖掘</td>\n",
       "      <td>NaN</td>\n",
       "      <td>15k-25k</td>\n",
       "      <td>本科</td>\n",
       "      <td>经验3-5年</td>\n",
       "      <td>银管家</td>\n",
       "      <td>2019-08-19</td>\n",
       "      <td>深圳·南头</td>\n",
       "      <td>五险一金 带薪年假 领导好 发展空间大</td>\n",
       "      <td>移动互联网,金融</td>\n",
       "      <td>不需要融资</td>\n",
       "      <td>50-150人</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>102503</th>\n",
       "      <td>3、工作弹性大</td>\n",
       "      <td>金融</td>\n",
       "      <td>不需要融资</td>\n",
       "      <td>2000人以上</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>102503</th>\n",
       "      <td>3、工作弹性大</td>\n",
       "      <td>金融</td>\n",
       "      <td>不需要融资</td>\n",
       "      <td>2000人以上</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>102572</th>\n",
       "      <td>102023</td>\n",
       "      <td>在线销售</td>\n",
       "      <td>教育</td>\n",
       "      <td>3k-6k</td>\n",
       "      <td>不限</td>\n",
       "      <td>经验不限</td>\n",
       "      <td>伍公分创新设计顾问</td>\n",
       "      <td>2019-08-23</td>\n",
       "      <td>上海·黄浦区</td>\n",
       "      <td>每日手机线上办公</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>102572</th>\n",
       "      <td>102023</td>\n",
       "      <td>在线销售</td>\n",
       "      <td>教育</td>\n",
       "      <td>3k-6k</td>\n",
       "      <td>不限</td>\n",
       "      <td>经验不限</td>\n",
       "      <td>伍公分创新设计顾问</td>\n",
       "      <td>2019-08-23</td>\n",
       "      <td>上海·黄浦区</td>\n",
       "      <td>每日手机线上办公</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>102572</th>\n",
       "      <td>102023</td>\n",
       "      <td>在线销售</td>\n",
       "      <td>教育</td>\n",
       "      <td>3k-6k</td>\n",
       "      <td>不限</td>\n",
       "      <td>经验不限</td>\n",
       "      <td>伍公分创新设计顾问</td>\n",
       "      <td>2019-08-23</td>\n",
       "      <td>上海·黄浦区</td>\n",
       "      <td>每日手机线上办公</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>4986 rows × 13 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "             id   job jobtag   salary  edu     exp      company     pubdate  \\\n",
       "84           85  java    NaN  15k-30k   本科  经验3-5年         中森顺和  2019-08-21   \n",
       "420         421  Java    NaN   7k-14k   本科  经验3-5年          信雅达  2019-08-22   \n",
       "499         500   c++    NaN   8k-10k   本科  经验1-3年  成都锦运达科技有限公司  2019-08-21   \n",
       "971         972   php    NaN   8k-10k   本科  经验3-5年           思酷  2019-08-20   \n",
       "1658       1659  数据挖掘    NaN  15k-25k   本科  经验3-5年          银管家  2019-08-19   \n",
       "...         ...   ...    ...      ...  ...     ...          ...         ...   \n",
       "102503  3、工作弹性大    金融  不需要融资  2000人以上  NaN     NaN          NaN         NaN   \n",
       "102503  3、工作弹性大    金融  不需要融资  2000人以上  NaN     NaN          NaN         NaN   \n",
       "102572   102023  在线销售     教育    3k-6k   不限    经验不限    伍公分创新设计顾问  2019-08-23   \n",
       "102572   102023  在线销售     教育    3k-6k   不限    经验不限    伍公分创新设计顾问  2019-08-23   \n",
       "102572   102023  在线销售     教育    3k-6k   不限    经验不限    伍公分创新设计顾问  2019-08-23   \n",
       "\n",
       "        address             benefits   comtype finance  comsize  \n",
       "84       北京·朝阳区     15k-30K 有能力者工资面谈       区块链     未融资   15-50人  \n",
       "420     郑州·郑东新区           五险一金,双休,餐补     移动互联网    上市公司  2000人以上  \n",
       "499      成都·温江区    团建，拓展活动，五险一金，年底奖金  硬件,移动互联网     未融资   15-50人  \n",
       "971       苏州·松陵       高速成长，精英团队，工作稳定  移动互联网,电商   不需要融资  50-150人  \n",
       "1658      深圳·南头  五险一金 带薪年假 领导好 发展空间大  移动互联网,金融   不需要融资  50-150人  \n",
       "...         ...                  ...       ...     ...      ...  \n",
       "102503      NaN                  NaN       NaN     NaN      NaN  \n",
       "102503      NaN                  NaN       NaN     NaN      NaN  \n",
       "102572   上海·黄浦区             每日手机线上办公       NaN     NaN      NaN  \n",
       "102572   上海·黄浦区             每日手机线上办公       NaN     NaN      NaN  \n",
       "102572   上海·黄浦区             每日手机线上办公       NaN     NaN      NaN  \n",
       "\n",
       "[4986 rows x 13 columns]"
      ]
     },
     "execution_count": 15,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "lagou[lagou.isnull().values == True]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 去掉带空值的数据"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "<class 'pandas.core.frame.DataFrame'>\n",
      "Int64Index: 101438 entries, 0 to 102642\n",
      "Data columns (total 13 columns):\n",
      " #   Column    Non-Null Count   Dtype \n",
      "---  ------    --------------   ----- \n",
      " 0   id        101438 non-null  object\n",
      " 1   job       101438 non-null  object\n",
      " 2   jobtag    101438 non-null  object\n",
      " 3   salary    101438 non-null  object\n",
      " 4   edu       101438 non-null  object\n",
      " 5   exp       101438 non-null  object\n",
      " 6   company   101438 non-null  object\n",
      " 7   pubdate   101438 non-null  object\n",
      " 8   address   101438 non-null  object\n",
      " 9   benefits  101438 non-null  object\n",
      " 10  comtype   101438 non-null  object\n",
      " 11  finance   101438 non-null  object\n",
      " 12  comsize   101438 non-null  object\n",
      "dtypes: object(13)\n",
      "memory usage: 10.8+ MB\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "None"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "lagou.dropna(inplace = True)\n",
    "display(lagou.info())"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 检测是否还有空值的行"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>id</th>\n",
       "      <th>job</th>\n",
       "      <th>jobtag</th>\n",
       "      <th>salary</th>\n",
       "      <th>edu</th>\n",
       "      <th>exp</th>\n",
       "      <th>company</th>\n",
       "      <th>pubdate</th>\n",
       "      <th>address</th>\n",
       "      <th>benefits</th>\n",
       "      <th>comtype</th>\n",
       "      <th>finance</th>\n",
       "      <th>comsize</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "Empty DataFrame\n",
       "Columns: [id, job, jobtag, salary, edu, exp, company, pubdate, address, benefits, comtype, finance, comsize]\n",
       "Index: []"
      ]
     },
     "execution_count": 17,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "lagou[lagou.isnull().values == True]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 检测薪资的值都有哪些"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>id</th>\n",
       "      <th>job</th>\n",
       "      <th>jobtag</th>\n",
       "      <th>salary</th>\n",
       "      <th>edu</th>\n",
       "      <th>exp</th>\n",
       "      <th>company</th>\n",
       "      <th>pubdate</th>\n",
       "      <th>address</th>\n",
       "      <th>benefits</th>\n",
       "      <th>comtype</th>\n",
       "      <th>finance</th>\n",
       "      <th>comsize</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>67150</th>\n",
       "      <td>66914</td>\n",
       "      <td>品牌经理</td>\n",
       "      <td>教育,广告营销,品牌管理,品牌规划,形象建设,品牌传播/推广</td>\n",
       "      <td>25k-35k</td>\n",
       "      <td>本科</td>\n",
       "      <td>经验5-10年</td>\n",
       "      <td>UniCareer职优你</td>\n",
       "      <td>2019-08-23</td>\n",
       "      <td>北京·朝阳区</td>\n",
       "      <td>五险一金；带薪年假；</td>\n",
       "      <td>教育</td>\n",
       "      <td>C轮</td>\n",
       "      <td>150-500人</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>17423</th>\n",
       "      <td>17394</td>\n",
       "      <td>网络工程师</td>\n",
       "      <td>机房运维</td>\n",
       "      <td>10k-15k</td>\n",
       "      <td>不限</td>\n",
       "      <td>经验1-3年</td>\n",
       "      <td>维网科技</td>\n",
       "      <td>2019-08-12</td>\n",
       "      <td>张家口·张北县</td>\n",
       "      <td>福利待遇</td>\n",
       "      <td>企业服务,数据服务</td>\n",
       "      <td>未融资</td>\n",
       "      <td>50-150人</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>39485</th>\n",
       "      <td>39378</td>\n",
       "      <td>APP交互设计师</td>\n",
       "      <td>硬件交互,移动交互</td>\n",
       "      <td>20k-30k</td>\n",
       "      <td>本科</td>\n",
       "      <td>经验3-5年</td>\n",
       "      <td>小牛电动车</td>\n",
       "      <td>2019-08-22</td>\n",
       "      <td>北京·朝阳区</td>\n",
       "      <td>年终奖 商业保险 生日福利 员工培训</td>\n",
       "      <td>电商,硬件</td>\n",
       "      <td>上市公司</td>\n",
       "      <td>500-2000人</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "          id       job                          jobtag   salary edu      exp  \\\n",
       "67150  66914      品牌经理  教育,广告营销,品牌管理,品牌规划,形象建设,品牌传播/推广  25k-35k  本科  经验5-10年   \n",
       "17423  17394     网络工程师                            机房运维  10k-15k  不限   经验1-3年   \n",
       "39485  39378  APP交互设计师                       硬件交互,移动交互  20k-30k  本科   经验3-5年   \n",
       "\n",
       "            company     pubdate  address            benefits    comtype  \\\n",
       "67150  UniCareer职优你  2019-08-23   北京·朝阳区          五险一金；带薪年假；         教育   \n",
       "17423          维网科技  2019-08-12  张家口·张北县                福利待遇  企业服务,数据服务   \n",
       "39485         小牛电动车  2019-08-22   北京·朝阳区  年终奖 商业保险 生日福利 员工培训      电商,硬件   \n",
       "\n",
       "      finance    comsize  \n",
       "67150      C轮   150-500人  \n",
       "17423     未融资    50-150人  \n",
       "39485    上市公司  500-2000人  "
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>number</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>salary</th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>90000-99000</th>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>27000-42000</th>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>27000-54000</th>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>28000-33000</th>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>28000-51000</th>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8000-15000</th>\n",
       "      <td>4341</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10000-15000</th>\n",
       "      <td>4965</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>15000-30000</th>\n",
       "      <td>6115</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>15000-25000</th>\n",
       "      <td>6568</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10000-20000</th>\n",
       "      <td>7116</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>418 rows × 1 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "             number\n",
       "salary             \n",
       "90000-99000       1\n",
       "27000-42000       1\n",
       "27000-54000       1\n",
       "28000-33000       1\n",
       "28000-51000       1\n",
       "...             ...\n",
       "8000-15000     4341\n",
       "10000-15000    4965\n",
       "15000-30000    6115\n",
       "15000-25000    6568\n",
       "10000-20000    7116\n",
       "\n",
       "[418 rows x 1 columns]"
      ]
     },
     "execution_count": 18,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "display(lagou.sample(3))\n",
    "# ~ : not\n",
    "# str.contains(‘xx’) 检测str是否带有xx字符\n",
    "\n",
    "# 去掉所有带“以上”这个词的记录\n",
    "lagou = lagou[~lagou['salary'].str.contains('以上')]\n",
    "\n",
    "# 把salary字段中，所有带大小写k全部替换成000\n",
    "lagou['salary'] = lagou['salary'].str.replace('k','000')\n",
    "lagou['salary'] = lagou['salary'].str.replace('K','000')\n",
    "\n",
    "# 查看薪资的值都有哪些\n",
    "lagou['number'] = 1\n",
    "# 按照薪资分组，按照number计数，再按照number升序排序\n",
    "lagou[['salary','number']].groupby('salary').agg({'number':'count'}).sort_values('number')\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 将薪资进行拆分"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>0</th>\n",
       "      <th>1</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>20000</td>\n",
       "      <td>40000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>25000</td>\n",
       "      <td>35000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>20000</td>\n",
       "      <td>40000</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "       0      1\n",
       "0  20000  40000\n",
       "1  25000  35000\n",
       "2  20000  40000"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>id</th>\n",
       "      <th>job</th>\n",
       "      <th>jobtag</th>\n",
       "      <th>salary</th>\n",
       "      <th>edu</th>\n",
       "      <th>exp</th>\n",
       "      <th>company</th>\n",
       "      <th>pubdate</th>\n",
       "      <th>address</th>\n",
       "      <th>benefits</th>\n",
       "      <th>comtype</th>\n",
       "      <th>finance</th>\n",
       "      <th>comsize</th>\n",
       "      <th>number</th>\n",
       "      <th>salary_lower</th>\n",
       "      <th>salary_upper</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1</td>\n",
       "      <td>JAVA高级研发工程师—阿拉丁科技</td>\n",
       "      <td>Java,Android,IOS</td>\n",
       "      <td>20000-40000</td>\n",
       "      <td>本科</td>\n",
       "      <td>经验3-5年</td>\n",
       "      <td>阿拉丁控股集团有限公司</td>\n",
       "      <td>2019-08-21</td>\n",
       "      <td>北京·大望路</td>\n",
       "      <td>五险一金，周末双休</td>\n",
       "      <td>金融</td>\n",
       "      <td>不需要融资</td>\n",
       "      <td>150-500人</td>\n",
       "      <td>1</td>\n",
       "      <td>20000</td>\n",
       "      <td>40000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>2</td>\n",
       "      <td>高级Java工程师</td>\n",
       "      <td>后端,Java,服务器端</td>\n",
       "      <td>25000-35000</td>\n",
       "      <td>本科</td>\n",
       "      <td>经验3-5年</td>\n",
       "      <td>小帮规划</td>\n",
       "      <td>2019-08-16</td>\n",
       "      <td>北京·望京</td>\n",
       "      <td>牛人多，氛围好，福利好</td>\n",
       "      <td>移动互联网,金融</td>\n",
       "      <td>B轮</td>\n",
       "      <td>150-500人</td>\n",
       "      <td>1</td>\n",
       "      <td>25000</td>\n",
       "      <td>35000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>3</td>\n",
       "      <td>Java开发工程师</td>\n",
       "      <td>Java</td>\n",
       "      <td>20000-40000</td>\n",
       "      <td>本科</td>\n",
       "      <td>经验3-5年</td>\n",
       "      <td>玩吧</td>\n",
       "      <td>2019-08-20</td>\n",
       "      <td>北京·东城区</td>\n",
       "      <td>七险一金,弹性不打卡,免费午餐,季度旅游</td>\n",
       "      <td>社交</td>\n",
       "      <td>B轮</td>\n",
       "      <td>150-500人</td>\n",
       "      <td>1</td>\n",
       "      <td>20000</td>\n",
       "      <td>40000</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "  id                job            jobtag       salary edu     exp  \\\n",
       "0  1  JAVA高级研发工程师—阿拉丁科技  Java,Android,IOS  20000-40000  本科  经验3-5年   \n",
       "1  2          高级Java工程师      后端,Java,服务器端  25000-35000  本科  经验3-5年   \n",
       "2  3          Java开发工程师              Java  20000-40000  本科  经验3-5年   \n",
       "\n",
       "       company     pubdate address              benefits   comtype finance  \\\n",
       "0  阿拉丁控股集团有限公司  2019-08-21  北京·大望路             五险一金，周末双休        金融   不需要融资   \n",
       "1         小帮规划  2019-08-16   北京·望京           牛人多，氛围好，福利好  移动互联网,金融      B轮   \n",
       "2           玩吧  2019-08-20  北京·东城区  七险一金,弹性不打卡,免费午餐,季度旅游        社交      B轮   \n",
       "\n",
       "    comsize  number salary_lower salary_upper  \n",
       "0  150-500人       1        20000        40000  \n",
       "1  150-500人       1        25000        35000  \n",
       "2  150-500人       1        20000        40000  "
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "# expand为True时，会把切割出来的内容当做一列\n",
    "display(lagou['salary'].str.split('-',expand = True).head(3))\n",
    "lagou[['salary_lower','salary_upper']] = lagou['salary'].str.split('-',expand = True)\n",
    "display(lagou.head(3))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 转换薪资类型"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "<class 'pandas.core.frame.DataFrame'>\n",
      "Int64Index: 101430 entries, 0 to 102642\n",
      "Data columns (total 16 columns):\n",
      " #   Column        Non-Null Count   Dtype \n",
      "---  ------        --------------   ----- \n",
      " 0   id            101430 non-null  object\n",
      " 1   job           101430 non-null  object\n",
      " 2   jobtag        101430 non-null  object\n",
      " 3   salary        101430 non-null  object\n",
      " 4   edu           101430 non-null  object\n",
      " 5   exp           101430 non-null  object\n",
      " 6   company       101430 non-null  object\n",
      " 7   pubdate       101430 non-null  object\n",
      " 8   address       101430 non-null  object\n",
      " 9   benefits      101430 non-null  object\n",
      " 10  comtype       101430 non-null  object\n",
      " 11  finance       101430 non-null  object\n",
      " 12  comsize       101430 non-null  object\n",
      " 13  number        101430 non-null  int64 \n",
      " 14  salary_lower  101430 non-null  object\n",
      " 15  salary_upper  101430 non-null  object\n",
      "dtypes: int64(1), object(15)\n",
      "memory usage: 13.2+ MB\n",
      "-------------------------------\n",
      "<class 'pandas.core.frame.DataFrame'>\n",
      "Int64Index: 101430 entries, 0 to 102642\n",
      "Data columns (total 16 columns):\n",
      " #   Column        Non-Null Count   Dtype \n",
      "---  ------        --------------   ----- \n",
      " 0   id            101430 non-null  object\n",
      " 1   job           101430 non-null  object\n",
      " 2   jobtag        101430 non-null  object\n",
      " 3   salary        101430 non-null  object\n",
      " 4   edu           101430 non-null  object\n",
      " 5   exp           101430 non-null  object\n",
      " 6   company       101430 non-null  object\n",
      " 7   pubdate       101430 non-null  object\n",
      " 8   address       101430 non-null  object\n",
      " 9   benefits      101430 non-null  object\n",
      " 10  comtype       101430 non-null  object\n",
      " 11  finance       101430 non-null  object\n",
      " 12  comsize       101430 non-null  object\n",
      " 13  number        101430 non-null  int64 \n",
      " 14  salary_lower  101430 non-null  int64 \n",
      " 15  salary_upper  101430 non-null  int64 \n",
      "dtypes: int64(3), object(13)\n",
      "memory usage: 13.2+ MB\n"
     ]
    }
   ],
   "source": [
    "lagou.info()\n",
    "print('-------------------------------')\n",
    "lagou[['salary_lower','salary_upper']] = lagou[['salary_lower','salary_upper']].astype(np.int64)\n",
    "lagou.info()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 年、月、日分别拆分出来"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "metadata": {
    "scrolled": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>id</th>\n",
       "      <th>job</th>\n",
       "      <th>jobtag</th>\n",
       "      <th>salary</th>\n",
       "      <th>edu</th>\n",
       "      <th>exp</th>\n",
       "      <th>company</th>\n",
       "      <th>pubdate</th>\n",
       "      <th>address</th>\n",
       "      <th>benefits</th>\n",
       "      <th>comtype</th>\n",
       "      <th>finance</th>\n",
       "      <th>comsize</th>\n",
       "      <th>number</th>\n",
       "      <th>salary_lower</th>\n",
       "      <th>salary_upper</th>\n",
       "      <th>year</th>\n",
       "      <th>month</th>\n",
       "      <th>day</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1</td>\n",
       "      <td>JAVA高级研发工程师—阿拉丁科技</td>\n",
       "      <td>Java,Android,IOS</td>\n",
       "      <td>20000-40000</td>\n",
       "      <td>本科</td>\n",
       "      <td>经验3-5年</td>\n",
       "      <td>阿拉丁控股集团有限公司</td>\n",
       "      <td>2019-08-21</td>\n",
       "      <td>北京·大望路</td>\n",
       "      <td>五险一金，周末双休</td>\n",
       "      <td>金融</td>\n",
       "      <td>不需要融资</td>\n",
       "      <td>150-500人</td>\n",
       "      <td>1</td>\n",
       "      <td>20000</td>\n",
       "      <td>40000</td>\n",
       "      <td>2019</td>\n",
       "      <td>08</td>\n",
       "      <td>21</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>2</td>\n",
       "      <td>高级Java工程师</td>\n",
       "      <td>后端,Java,服务器端</td>\n",
       "      <td>25000-35000</td>\n",
       "      <td>本科</td>\n",
       "      <td>经验3-5年</td>\n",
       "      <td>小帮规划</td>\n",
       "      <td>2019-08-16</td>\n",
       "      <td>北京·望京</td>\n",
       "      <td>牛人多，氛围好，福利好</td>\n",
       "      <td>移动互联网,金融</td>\n",
       "      <td>B轮</td>\n",
       "      <td>150-500人</td>\n",
       "      <td>1</td>\n",
       "      <td>25000</td>\n",
       "      <td>35000</td>\n",
       "      <td>2019</td>\n",
       "      <td>08</td>\n",
       "      <td>16</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>3</td>\n",
       "      <td>Java开发工程师</td>\n",
       "      <td>Java</td>\n",
       "      <td>20000-40000</td>\n",
       "      <td>本科</td>\n",
       "      <td>经验3-5年</td>\n",
       "      <td>玩吧</td>\n",
       "      <td>2019-08-20</td>\n",
       "      <td>北京·东城区</td>\n",
       "      <td>七险一金,弹性不打卡,免费午餐,季度旅游</td>\n",
       "      <td>社交</td>\n",
       "      <td>B轮</td>\n",
       "      <td>150-500人</td>\n",
       "      <td>1</td>\n",
       "      <td>20000</td>\n",
       "      <td>40000</td>\n",
       "      <td>2019</td>\n",
       "      <td>08</td>\n",
       "      <td>20</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "  id                job            jobtag       salary edu     exp  \\\n",
       "0  1  JAVA高级研发工程师—阿拉丁科技  Java,Android,IOS  20000-40000  本科  经验3-5年   \n",
       "1  2          高级Java工程师      后端,Java,服务器端  25000-35000  本科  经验3-5年   \n",
       "2  3          Java开发工程师              Java  20000-40000  本科  经验3-5年   \n",
       "\n",
       "       company     pubdate address              benefits   comtype finance  \\\n",
       "0  阿拉丁控股集团有限公司  2019-08-21  北京·大望路             五险一金，周末双休        金融   不需要融资   \n",
       "1         小帮规划  2019-08-16   北京·望京           牛人多，氛围好，福利好  移动互联网,金融      B轮   \n",
       "2           玩吧  2019-08-20  北京·东城区  七险一金,弹性不打卡,免费午餐,季度旅游        社交      B轮   \n",
       "\n",
       "    comsize  number  salary_lower  salary_upper  year month day  \n",
       "0  150-500人       1         20000         40000  2019    08  21  \n",
       "1  150-500人       1         25000         35000  2019    08  16  \n",
       "2  150-500人       1         20000         40000  2019    08  20  "
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>id</th>\n",
       "      <th>job</th>\n",
       "      <th>jobtag</th>\n",
       "      <th>salary</th>\n",
       "      <th>edu</th>\n",
       "      <th>exp</th>\n",
       "      <th>company</th>\n",
       "      <th>address</th>\n",
       "      <th>benefits</th>\n",
       "      <th>comtype</th>\n",
       "      <th>finance</th>\n",
       "      <th>comsize</th>\n",
       "      <th>number</th>\n",
       "      <th>salary_lower</th>\n",
       "      <th>salary_upper</th>\n",
       "      <th>year</th>\n",
       "      <th>month</th>\n",
       "      <th>day</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1</td>\n",
       "      <td>JAVA高级研发工程师—阿拉丁科技</td>\n",
       "      <td>Java,Android,IOS</td>\n",
       "      <td>20000-40000</td>\n",
       "      <td>本科</td>\n",
       "      <td>经验3-5年</td>\n",
       "      <td>阿拉丁控股集团有限公司</td>\n",
       "      <td>北京·大望路</td>\n",
       "      <td>五险一金，周末双休</td>\n",
       "      <td>金融</td>\n",
       "      <td>不需要融资</td>\n",
       "      <td>150-500人</td>\n",
       "      <td>1</td>\n",
       "      <td>20000</td>\n",
       "      <td>40000</td>\n",
       "      <td>2019</td>\n",
       "      <td>08</td>\n",
       "      <td>21</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>2</td>\n",
       "      <td>高级Java工程师</td>\n",
       "      <td>后端,Java,服务器端</td>\n",
       "      <td>25000-35000</td>\n",
       "      <td>本科</td>\n",
       "      <td>经验3-5年</td>\n",
       "      <td>小帮规划</td>\n",
       "      <td>北京·望京</td>\n",
       "      <td>牛人多，氛围好，福利好</td>\n",
       "      <td>移动互联网,金融</td>\n",
       "      <td>B轮</td>\n",
       "      <td>150-500人</td>\n",
       "      <td>1</td>\n",
       "      <td>25000</td>\n",
       "      <td>35000</td>\n",
       "      <td>2019</td>\n",
       "      <td>08</td>\n",
       "      <td>16</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>3</td>\n",
       "      <td>Java开发工程师</td>\n",
       "      <td>Java</td>\n",
       "      <td>20000-40000</td>\n",
       "      <td>本科</td>\n",
       "      <td>经验3-5年</td>\n",
       "      <td>玩吧</td>\n",
       "      <td>北京·东城区</td>\n",
       "      <td>七险一金,弹性不打卡,免费午餐,季度旅游</td>\n",
       "      <td>社交</td>\n",
       "      <td>B轮</td>\n",
       "      <td>150-500人</td>\n",
       "      <td>1</td>\n",
       "      <td>20000</td>\n",
       "      <td>40000</td>\n",
       "      <td>2019</td>\n",
       "      <td>08</td>\n",
       "      <td>20</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "  id                job            jobtag       salary edu     exp  \\\n",
       "0  1  JAVA高级研发工程师—阿拉丁科技  Java,Android,IOS  20000-40000  本科  经验3-5年   \n",
       "1  2          高级Java工程师      后端,Java,服务器端  25000-35000  本科  经验3-5年   \n",
       "2  3          Java开发工程师              Java  20000-40000  本科  经验3-5年   \n",
       "\n",
       "       company address              benefits   comtype finance   comsize  \\\n",
       "0  阿拉丁控股集团有限公司  北京·大望路             五险一金，周末双休        金融   不需要融资  150-500人   \n",
       "1         小帮规划   北京·望京           牛人多，氛围好，福利好  移动互联网,金融      B轮  150-500人   \n",
       "2           玩吧  北京·东城区  七险一金,弹性不打卡,免费午餐,季度旅游        社交      B轮  150-500人   \n",
       "\n",
       "   number  salary_lower  salary_upper  year month day  \n",
       "0       1         20000         40000  2019    08  21  \n",
       "1       1         25000         35000  2019    08  16  \n",
       "2       1         20000         40000  2019    08  20  "
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "lagou[['year','month','day']] = lagou['pubdate'].str.split('-',expand = True)\n",
    "display(lagou.head(3))\n",
    "del lagou['pubdate']\n",
    "display(lagou.head(3))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 年月日拼接出来"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 第一列的名称.str.cat(第二列的名称)\n",
    "# str.cat()函数使用的前提是两列的内容都是字符串，如果是数值型的话会报错,必须先数值转字符串astype(np.str)\n",
    "lagou['pubdate1'] = lagou['year'].str.cat(lagou['month'],sep = '-')\n",
    "lagou['pubdate'] = lagou['pubdate1'].str.cat(lagou['day'],sep = '-')\n",
    "del lagou['pubdate1']\n",
    "# 转日期格式\n",
    "lagou['pubdate'] = pd.to_datetime(lagou['pubdate'])"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 城市和区域拆分出来"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>id</th>\n",
       "      <th>job</th>\n",
       "      <th>jobtag</th>\n",
       "      <th>salary</th>\n",
       "      <th>edu</th>\n",
       "      <th>exp</th>\n",
       "      <th>company</th>\n",
       "      <th>address</th>\n",
       "      <th>benefits</th>\n",
       "      <th>comtype</th>\n",
       "      <th>finance</th>\n",
       "      <th>comsize</th>\n",
       "      <th>number</th>\n",
       "      <th>salary_lower</th>\n",
       "      <th>salary_upper</th>\n",
       "      <th>year</th>\n",
       "      <th>month</th>\n",
       "      <th>day</th>\n",
       "      <th>pubdate</th>\n",
       "      <th>city</th>\n",
       "      <th>area</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1</td>\n",
       "      <td>JAVA高级研发工程师—阿拉丁科技</td>\n",
       "      <td>Java,Android,IOS</td>\n",
       "      <td>20000-40000</td>\n",
       "      <td>本科</td>\n",
       "      <td>经验3-5年</td>\n",
       "      <td>阿拉丁控股集团有限公司</td>\n",
       "      <td>北京·大望路</td>\n",
       "      <td>五险一金，周末双休</td>\n",
       "      <td>金融</td>\n",
       "      <td>不需要融资</td>\n",
       "      <td>150-500人</td>\n",
       "      <td>1</td>\n",
       "      <td>20000</td>\n",
       "      <td>40000</td>\n",
       "      <td>2019</td>\n",
       "      <td>08</td>\n",
       "      <td>21</td>\n",
       "      <td>2019-08-21</td>\n",
       "      <td>北京</td>\n",
       "      <td>大望路</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>2</td>\n",
       "      <td>高级Java工程师</td>\n",
       "      <td>后端,Java,服务器端</td>\n",
       "      <td>25000-35000</td>\n",
       "      <td>本科</td>\n",
       "      <td>经验3-5年</td>\n",
       "      <td>小帮规划</td>\n",
       "      <td>北京·望京</td>\n",
       "      <td>牛人多，氛围好，福利好</td>\n",
       "      <td>移动互联网,金融</td>\n",
       "      <td>B轮</td>\n",
       "      <td>150-500人</td>\n",
       "      <td>1</td>\n",
       "      <td>25000</td>\n",
       "      <td>35000</td>\n",
       "      <td>2019</td>\n",
       "      <td>08</td>\n",
       "      <td>16</td>\n",
       "      <td>2019-08-16</td>\n",
       "      <td>北京</td>\n",
       "      <td>望京</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>3</td>\n",
       "      <td>Java开发工程师</td>\n",
       "      <td>Java</td>\n",
       "      <td>20000-40000</td>\n",
       "      <td>本科</td>\n",
       "      <td>经验3-5年</td>\n",
       "      <td>玩吧</td>\n",
       "      <td>北京·东城区</td>\n",
       "      <td>七险一金,弹性不打卡,免费午餐,季度旅游</td>\n",
       "      <td>社交</td>\n",
       "      <td>B轮</td>\n",
       "      <td>150-500人</td>\n",
       "      <td>1</td>\n",
       "      <td>20000</td>\n",
       "      <td>40000</td>\n",
       "      <td>2019</td>\n",
       "      <td>08</td>\n",
       "      <td>20</td>\n",
       "      <td>2019-08-20</td>\n",
       "      <td>北京</td>\n",
       "      <td>东城区</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "  id                job            jobtag       salary edu     exp  \\\n",
       "0  1  JAVA高级研发工程师—阿拉丁科技  Java,Android,IOS  20000-40000  本科  经验3-5年   \n",
       "1  2          高级Java工程师      后端,Java,服务器端  25000-35000  本科  经验3-5年   \n",
       "2  3          Java开发工程师              Java  20000-40000  本科  经验3-5年   \n",
       "\n",
       "       company address              benefits   comtype finance   comsize  \\\n",
       "0  阿拉丁控股集团有限公司  北京·大望路             五险一金，周末双休        金融   不需要融资  150-500人   \n",
       "1         小帮规划   北京·望京           牛人多，氛围好，福利好  移动互联网,金融      B轮  150-500人   \n",
       "2           玩吧  北京·东城区  七险一金,弹性不打卡,免费午餐,季度旅游        社交      B轮  150-500人   \n",
       "\n",
       "   number  salary_lower  salary_upper  year month day    pubdate city area  \n",
       "0       1         20000         40000  2019    08  21 2019-08-21   北京  大望路  \n",
       "1       1         25000         35000  2019    08  16 2019-08-16   北京   望京  \n",
       "2       1         20000         40000  2019    08  20 2019-08-20   北京  东城区  "
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "lagou[['city','area']] = lagou['address'].str.split('·',expand = True)\n",
    "display(lagou.head(3))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 看是否还有空值的行"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "metadata": {
    "scrolled": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "<class 'pandas.core.frame.DataFrame'>\n",
      "Int64Index: 101430 entries, 0 to 102642\n",
      "Data columns (total 21 columns):\n",
      " #   Column        Non-Null Count   Dtype         \n",
      "---  ------        --------------   -----         \n",
      " 0   id            101430 non-null  object        \n",
      " 1   job           101430 non-null  object        \n",
      " 2   jobtag        101430 non-null  object        \n",
      " 3   salary        101430 non-null  object        \n",
      " 4   edu           101430 non-null  object        \n",
      " 5   exp           101430 non-null  object        \n",
      " 6   company       101430 non-null  object        \n",
      " 7   address       101430 non-null  object        \n",
      " 8   benefits      101430 non-null  object        \n",
      " 9   comtype       101430 non-null  object        \n",
      " 10  finance       101430 non-null  object        \n",
      " 11  comsize       101430 non-null  object        \n",
      " 12  number        101430 non-null  int64         \n",
      " 13  salary_lower  101430 non-null  int64         \n",
      " 14  salary_upper  101430 non-null  int64         \n",
      " 15  year          101430 non-null  object        \n",
      " 16  month         101430 non-null  object        \n",
      " 17  day           101430 non-null  object        \n",
      " 18  pubdate       101430 non-null  datetime64[ns]\n",
      " 19  city          101430 non-null  object        \n",
      " 20  area          100633 non-null  object        \n",
      "dtypes: datetime64[ns](1), int64(3), object(17)\n",
      "memory usage: 17.0+ MB\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "101430"
      ]
     },
     "execution_count": 24,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "lagou.info()\n",
    "len(lagou)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "<class 'pandas.core.frame.DataFrame'>\n",
      "Int64Index: 100633 entries, 0 to 102642\n",
      "Data columns (total 21 columns):\n",
      " #   Column        Non-Null Count   Dtype         \n",
      "---  ------        --------------   -----         \n",
      " 0   id            100633 non-null  object        \n",
      " 1   job           100633 non-null  object        \n",
      " 2   jobtag        100633 non-null  object        \n",
      " 3   salary        100633 non-null  object        \n",
      " 4   edu           100633 non-null  object        \n",
      " 5   exp           100633 non-null  object        \n",
      " 6   company       100633 non-null  object        \n",
      " 7   address       100633 non-null  object        \n",
      " 8   benefits      100633 non-null  object        \n",
      " 9   comtype       100633 non-null  object        \n",
      " 10  finance       100633 non-null  object        \n",
      " 11  comsize       100633 non-null  object        \n",
      " 12  number        100633 non-null  int64         \n",
      " 13  salary_lower  100633 non-null  int64         \n",
      " 14  salary_upper  100633 non-null  int64         \n",
      " 15  year          100633 non-null  object        \n",
      " 16  month         100633 non-null  object        \n",
      " 17  day           100633 non-null  object        \n",
      " 18  pubdate       100633 non-null  datetime64[ns]\n",
      " 19  city          100633 non-null  object        \n",
      " 20  area          100633 non-null  object        \n",
      "dtypes: datetime64[ns](1), int64(3), object(17)\n",
      "memory usage: 16.9+ MB\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "100633"
      ]
     },
     "execution_count": 25,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "lagou.dropna(inplace = True)\n",
    "lagou.info()\n",
    "len(lagou)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 设置ID作为行索引"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 26,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>job</th>\n",
       "      <th>jobtag</th>\n",
       "      <th>salary</th>\n",
       "      <th>edu</th>\n",
       "      <th>exp</th>\n",
       "      <th>company</th>\n",
       "      <th>address</th>\n",
       "      <th>benefits</th>\n",
       "      <th>comtype</th>\n",
       "      <th>finance</th>\n",
       "      <th>comsize</th>\n",
       "      <th>number</th>\n",
       "      <th>salary_lower</th>\n",
       "      <th>salary_upper</th>\n",
       "      <th>year</th>\n",
       "      <th>month</th>\n",
       "      <th>day</th>\n",
       "      <th>pubdate</th>\n",
       "      <th>city</th>\n",
       "      <th>area</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>id</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>JAVA高级研发工程师—阿拉丁科技</td>\n",
       "      <td>Java,Android,IOS</td>\n",
       "      <td>20000-40000</td>\n",
       "      <td>本科</td>\n",
       "      <td>经验3-5年</td>\n",
       "      <td>阿拉丁控股集团有限公司</td>\n",
       "      <td>北京·大望路</td>\n",
       "      <td>五险一金，周末双休</td>\n",
       "      <td>金融</td>\n",
       "      <td>不需要融资</td>\n",
       "      <td>150-500人</td>\n",
       "      <td>1</td>\n",
       "      <td>20000</td>\n",
       "      <td>40000</td>\n",
       "      <td>2019</td>\n",
       "      <td>08</td>\n",
       "      <td>21</td>\n",
       "      <td>2019-08-21</td>\n",
       "      <td>北京</td>\n",
       "      <td>大望路</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>高级Java工程师</td>\n",
       "      <td>后端,Java,服务器端</td>\n",
       "      <td>25000-35000</td>\n",
       "      <td>本科</td>\n",
       "      <td>经验3-5年</td>\n",
       "      <td>小帮规划</td>\n",
       "      <td>北京·望京</td>\n",
       "      <td>牛人多，氛围好，福利好</td>\n",
       "      <td>移动互联网,金融</td>\n",
       "      <td>B轮</td>\n",
       "      <td>150-500人</td>\n",
       "      <td>1</td>\n",
       "      <td>25000</td>\n",
       "      <td>35000</td>\n",
       "      <td>2019</td>\n",
       "      <td>08</td>\n",
       "      <td>16</td>\n",
       "      <td>2019-08-16</td>\n",
       "      <td>北京</td>\n",
       "      <td>望京</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>Java开发工程师</td>\n",
       "      <td>Java</td>\n",
       "      <td>20000-40000</td>\n",
       "      <td>本科</td>\n",
       "      <td>经验3-5年</td>\n",
       "      <td>玩吧</td>\n",
       "      <td>北京·东城区</td>\n",
       "      <td>七险一金,弹性不打卡,免费午餐,季度旅游</td>\n",
       "      <td>社交</td>\n",
       "      <td>B轮</td>\n",
       "      <td>150-500人</td>\n",
       "      <td>1</td>\n",
       "      <td>20000</td>\n",
       "      <td>40000</td>\n",
       "      <td>2019</td>\n",
       "      <td>08</td>\n",
       "      <td>20</td>\n",
       "      <td>2019-08-20</td>\n",
       "      <td>北京</td>\n",
       "      <td>东城区</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                  job            jobtag       salary edu     exp      company  \\\n",
       "id                                                                              \n",
       "1   JAVA高级研发工程师—阿拉丁科技  Java,Android,IOS  20000-40000  本科  经验3-5年  阿拉丁控股集团有限公司   \n",
       "2           高级Java工程师      后端,Java,服务器端  25000-35000  本科  经验3-5年         小帮规划   \n",
       "3           Java开发工程师              Java  20000-40000  本科  经验3-5年           玩吧   \n",
       "\n",
       "   address              benefits   comtype finance   comsize  number  \\\n",
       "id                                                                     \n",
       "1   北京·大望路             五险一金，周末双休        金融   不需要融资  150-500人       1   \n",
       "2    北京·望京           牛人多，氛围好，福利好  移动互联网,金融      B轮  150-500人       1   \n",
       "3   北京·东城区  七险一金,弹性不打卡,免费午餐,季度旅游        社交      B轮  150-500人       1   \n",
       "\n",
       "    salary_lower  salary_upper  year month day    pubdate city area  \n",
       "id                                                                   \n",
       "1          20000         40000  2019    08  21 2019-08-21   北京  大望路  \n",
       "2          25000         35000  2019    08  16 2019-08-16   北京   望京  \n",
       "3          20000         40000  2019    08  20 2019-08-20   北京  东城区  "
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "# 如果设置 drop = True，则id被设为index后，数据列中的id就被删除，如果设置false就保留\n",
    "lagou.set_index('id',drop = True,inplace = True)\n",
    "display(lagou.head(3))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 选取前7列"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 27,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>job</th>\n",
       "      <th>jobtag</th>\n",
       "      <th>salary</th>\n",
       "      <th>edu</th>\n",
       "      <th>exp</th>\n",
       "      <th>company</th>\n",
       "      <th>address</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>id</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>JAVA高级研发工程师—阿拉丁科技</td>\n",
       "      <td>Java,Android,IOS</td>\n",
       "      <td>20000-40000</td>\n",
       "      <td>本科</td>\n",
       "      <td>经验3-5年</td>\n",
       "      <td>阿拉丁控股集团有限公司</td>\n",
       "      <td>北京·大望路</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>高级Java工程师</td>\n",
       "      <td>后端,Java,服务器端</td>\n",
       "      <td>25000-35000</td>\n",
       "      <td>本科</td>\n",
       "      <td>经验3-5年</td>\n",
       "      <td>小帮规划</td>\n",
       "      <td>北京·望京</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>Java开发工程师</td>\n",
       "      <td>Java</td>\n",
       "      <td>20000-40000</td>\n",
       "      <td>本科</td>\n",
       "      <td>经验3-5年</td>\n",
       "      <td>玩吧</td>\n",
       "      <td>北京·东城区</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                  job            jobtag       salary edu     exp      company  \\\n",
       "id                                                                              \n",
       "1   JAVA高级研发工程师—阿拉丁科技  Java,Android,IOS  20000-40000  本科  经验3-5年  阿拉丁控股集团有限公司   \n",
       "2           高级Java工程师      后端,Java,服务器端  25000-35000  本科  经验3-5年         小帮规划   \n",
       "3           Java开发工程师              Java  20000-40000  本科  经验3-5年           玩吧   \n",
       "\n",
       "   address  \n",
       "id          \n",
       "1   北京·大望路  \n",
       "2    北京·望京  \n",
       "3   北京·东城区  "
      ]
     },
     "execution_count": 27,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 先获取行，再获取列\n",
    "lagou.iloc[:,0:7].head(3)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 选取除最后3列外的全部列"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 28,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>job</th>\n",
       "      <th>jobtag</th>\n",
       "      <th>salary</th>\n",
       "      <th>edu</th>\n",
       "      <th>exp</th>\n",
       "      <th>company</th>\n",
       "      <th>address</th>\n",
       "      <th>benefits</th>\n",
       "      <th>comtype</th>\n",
       "      <th>finance</th>\n",
       "      <th>comsize</th>\n",
       "      <th>number</th>\n",
       "      <th>salary_lower</th>\n",
       "      <th>salary_upper</th>\n",
       "      <th>year</th>\n",
       "      <th>month</th>\n",
       "      <th>day</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>id</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>JAVA高级研发工程师—阿拉丁科技</td>\n",
       "      <td>Java,Android,IOS</td>\n",
       "      <td>20000-40000</td>\n",
       "      <td>本科</td>\n",
       "      <td>经验3-5年</td>\n",
       "      <td>阿拉丁控股集团有限公司</td>\n",
       "      <td>北京·大望路</td>\n",
       "      <td>五险一金，周末双休</td>\n",
       "      <td>金融</td>\n",
       "      <td>不需要融资</td>\n",
       "      <td>150-500人</td>\n",
       "      <td>1</td>\n",
       "      <td>20000</td>\n",
       "      <td>40000</td>\n",
       "      <td>2019</td>\n",
       "      <td>08</td>\n",
       "      <td>21</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>高级Java工程师</td>\n",
       "      <td>后端,Java,服务器端</td>\n",
       "      <td>25000-35000</td>\n",
       "      <td>本科</td>\n",
       "      <td>经验3-5年</td>\n",
       "      <td>小帮规划</td>\n",
       "      <td>北京·望京</td>\n",
       "      <td>牛人多，氛围好，福利好</td>\n",
       "      <td>移动互联网,金融</td>\n",
       "      <td>B轮</td>\n",
       "      <td>150-500人</td>\n",
       "      <td>1</td>\n",
       "      <td>25000</td>\n",
       "      <td>35000</td>\n",
       "      <td>2019</td>\n",
       "      <td>08</td>\n",
       "      <td>16</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>Java开发工程师</td>\n",
       "      <td>Java</td>\n",
       "      <td>20000-40000</td>\n",
       "      <td>本科</td>\n",
       "      <td>经验3-5年</td>\n",
       "      <td>玩吧</td>\n",
       "      <td>北京·东城区</td>\n",
       "      <td>七险一金,弹性不打卡,免费午餐,季度旅游</td>\n",
       "      <td>社交</td>\n",
       "      <td>B轮</td>\n",
       "      <td>150-500人</td>\n",
       "      <td>1</td>\n",
       "      <td>20000</td>\n",
       "      <td>40000</td>\n",
       "      <td>2019</td>\n",
       "      <td>08</td>\n",
       "      <td>20</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                  job            jobtag       salary edu     exp      company  \\\n",
       "id                                                                              \n",
       "1   JAVA高级研发工程师—阿拉丁科技  Java,Android,IOS  20000-40000  本科  经验3-5年  阿拉丁控股集团有限公司   \n",
       "2           高级Java工程师      后端,Java,服务器端  25000-35000  本科  经验3-5年         小帮规划   \n",
       "3           Java开发工程师              Java  20000-40000  本科  经验3-5年           玩吧   \n",
       "\n",
       "   address              benefits   comtype finance   comsize  number  \\\n",
       "id                                                                     \n",
       "1   北京·大望路             五险一金，周末双休        金融   不需要融资  150-500人       1   \n",
       "2    北京·望京           牛人多，氛围好，福利好  移动互联网,金融      B轮  150-500人       1   \n",
       "3   北京·东城区  七险一金,弹性不打卡,免费午餐,季度旅游        社交      B轮  150-500人       1   \n",
       "\n",
       "    salary_lower  salary_upper  year month day  \n",
       "id                                              \n",
       "1          20000         40000  2019    08  21  \n",
       "2          25000         35000  2019    08  16  \n",
       "3          20000         40000  2019    08  20  "
      ]
     },
     "execution_count": 28,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "lagou.iloc[:,0:-3].head(3)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 查看数据有多少行多少列"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 29,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(100633, 20)"
      ]
     },
     "execution_count": 29,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "lagou.shape"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 统计总共有多少条招聘信息"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 30,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "100633\n",
      "100633\n",
      "number    100633\n",
      "dtype: int64\n"
     ]
    }
   ],
   "source": [
    "print(len(lagou))\n",
    "print(lagou['number'].sum())\n",
    "print(lagou[['number']].sum())"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 统计所有职位的平均上限工资和下限工资是多少"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 31,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "salary_lower    12097.423310\n",
       "salary_upper    20701.449823\n",
       "dtype: float64"
      ]
     },
     "execution_count": 31,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "lagou[['salary_lower','salary_upper']].mean()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 找出上限工资大于80K的职位"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 32,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>job</th>\n",
       "      <th>salary</th>\n",
       "      <th>salary_upper</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>id</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>23738</th>\n",
       "      <td>网站运营副总/合伙人</td>\n",
       "      <td>499000-500000</td>\n",
       "      <td>500000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>23751</th>\n",
       "      <td>义乌子公司总经理/合伙人</td>\n",
       "      <td>499000-500000</td>\n",
       "      <td>500000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>23584</th>\n",
       "      <td>CTO</td>\n",
       "      <td>200000-400000</td>\n",
       "      <td>400000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>11723</th>\n",
       "      <td>语音合成算法实习生</td>\n",
       "      <td>200000-300000</td>\n",
       "      <td>300000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>53377</th>\n",
       "      <td>新媒体运营实习生</td>\n",
       "      <td>200000-250000</td>\n",
       "      <td>250000</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                job         salary  salary_upper\n",
       "id                                              \n",
       "23738    网站运营副总/合伙人  499000-500000        500000\n",
       "23751  义乌子公司总经理/合伙人  499000-500000        500000\n",
       "23584           CTO  200000-400000        400000\n",
       "11723     语音合成算法实习生  200000-300000        300000\n",
       "53377      新媒体运营实习生  200000-250000        250000"
      ]
     },
     "execution_count": 32,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "result = lagou[lagou['salary_upper'] > 80000]\n",
    "result[['job','salary','salary_upper']].sort_values(['salary_upper'],ascending = False).head(5)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 找出北京地区的工资大于80K的职位"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 33,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>job</th>\n",
       "      <th>salary_upper</th>\n",
       "      <th>city</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>id</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>11723</th>\n",
       "      <td>语音合成算法实习生</td>\n",
       "      <td>300000</td>\n",
       "      <td>北京</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>53377</th>\n",
       "      <td>新媒体运营实习生</td>\n",
       "      <td>250000</td>\n",
       "      <td>北京</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>24871</th>\n",
       "      <td>安全开发实习生</td>\n",
       "      <td>250000</td>\n",
       "      <td>北京</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5793</th>\n",
       "      <td>后端开发工程师（校招/PPS）(J12656)</td>\n",
       "      <td>240000</td>\n",
       "      <td>北京</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>977</th>\n",
       "      <td>PHP实习研发工程师</td>\n",
       "      <td>200000</td>\n",
       "      <td>北京</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                           job  salary_upper city\n",
       "id                                               \n",
       "11723                语音合成算法实习生        300000   北京\n",
       "53377                 新媒体运营实习生        250000   北京\n",
       "24871                  安全开发实习生        250000   北京\n",
       "5793   后端开发工程师（校招/PPS）(J12656)        240000   北京\n",
       "977                 PHP实习研发工程师        200000   北京"
      ]
     },
     "execution_count": 33,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "result = lagou[(lagou['salary_upper']>80000) & (lagou['city'].str.contains('北京'))]\n",
    "result[['job','salary_upper','city']].sort_values(['salary_upper'],ascending = False).head()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 找出所有C轮和B轮的公司的名称，职位，和薪资，和融资情况"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 34,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>company</th>\n",
       "      <th>finance</th>\n",
       "      <th>job</th>\n",
       "      <th>salary</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>id</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>32827</th>\n",
       "      <td>呆萝卜</td>\n",
       "      <td>A轮</td>\n",
       "      <td>大数据开发工程师</td>\n",
       "      <td>20000-35000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>63092</th>\n",
       "      <td>同盾科技</td>\n",
       "      <td>C轮</td>\n",
       "      <td>高级客户经理（银行）-01373</td>\n",
       "      <td>10000-20000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>67529</th>\n",
       "      <td>坚果动力</td>\n",
       "      <td>A轮</td>\n",
       "      <td>系统策划</td>\n",
       "      <td>10000-15000</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "      company finance               job       salary\n",
       "id                                                  \n",
       "32827     呆萝卜      A轮          大数据开发工程师  20000-35000\n",
       "63092    同盾科技      C轮  高级客户经理（银行）-01373  10000-20000\n",
       "67529    坚果动力      A轮              系统策划  10000-15000"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "display(lagou[['company','finance','job','salary']].sample(3))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 36,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "15791\n",
      "15791\n"
     ]
    }
   ],
   "source": [
    "print(len(lagou[lagou['finance'].isin(['B轮','C轮'])]))\n",
    "print(len(lagou[lagou.finance.isin(['B轮','C轮'])]))\n",
    "\n",
    "#.loc[行，列] 这里的行是指返回True的行 \n",
    "res = lagou.loc[lagou['finance'].isin(['B轮','C轮']),['company','finance','job','salary']]\n",
    "res.to_csv('./dataset/res.csv')"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 一共多少家公司发布招聘职位？"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 37,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "17372"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/plain": [
       "17372"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "display(len(lagou['company'].unique()))\n",
    "res = lagou['company'].nunique()\n",
    "display(res)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 招聘需求最大的岗位是什么？"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 38,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>job</th>\n",
       "      <th>jobtag</th>\n",
       "      <th>salary</th>\n",
       "      <th>edu</th>\n",
       "      <th>exp</th>\n",
       "      <th>company</th>\n",
       "      <th>address</th>\n",
       "      <th>benefits</th>\n",
       "      <th>comtype</th>\n",
       "      <th>finance</th>\n",
       "      <th>comsize</th>\n",
       "      <th>number</th>\n",
       "      <th>salary_lower</th>\n",
       "      <th>salary_upper</th>\n",
       "      <th>year</th>\n",
       "      <th>month</th>\n",
       "      <th>day</th>\n",
       "      <th>pubdate</th>\n",
       "      <th>city</th>\n",
       "      <th>area</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>id</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>JAVA高级研发工程师—阿拉丁科技</td>\n",
       "      <td>Java,Android,IOS</td>\n",
       "      <td>20000-40000</td>\n",
       "      <td>本科</td>\n",
       "      <td>经验3-5年</td>\n",
       "      <td>阿拉丁控股集团有限公司</td>\n",
       "      <td>北京·大望路</td>\n",
       "      <td>五险一金，周末双休</td>\n",
       "      <td>金融</td>\n",
       "      <td>不需要融资</td>\n",
       "      <td>150-500人</td>\n",
       "      <td>1</td>\n",
       "      <td>20000</td>\n",
       "      <td>40000</td>\n",
       "      <td>2019</td>\n",
       "      <td>08</td>\n",
       "      <td>21</td>\n",
       "      <td>2019-08-21</td>\n",
       "      <td>北京</td>\n",
       "      <td>大望路</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>高级Java工程师</td>\n",
       "      <td>后端,Java,服务器端</td>\n",
       "      <td>25000-35000</td>\n",
       "      <td>本科</td>\n",
       "      <td>经验3-5年</td>\n",
       "      <td>小帮规划</td>\n",
       "      <td>北京·望京</td>\n",
       "      <td>牛人多，氛围好，福利好</td>\n",
       "      <td>移动互联网,金融</td>\n",
       "      <td>B轮</td>\n",
       "      <td>150-500人</td>\n",
       "      <td>1</td>\n",
       "      <td>25000</td>\n",
       "      <td>35000</td>\n",
       "      <td>2019</td>\n",
       "      <td>08</td>\n",
       "      <td>16</td>\n",
       "      <td>2019-08-16</td>\n",
       "      <td>北京</td>\n",
       "      <td>望京</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>Java开发工程师</td>\n",
       "      <td>Java</td>\n",
       "      <td>20000-40000</td>\n",
       "      <td>本科</td>\n",
       "      <td>经验3-5年</td>\n",
       "      <td>玩吧</td>\n",
       "      <td>北京·东城区</td>\n",
       "      <td>七险一金,弹性不打卡,免费午餐,季度旅游</td>\n",
       "      <td>社交</td>\n",
       "      <td>B轮</td>\n",
       "      <td>150-500人</td>\n",
       "      <td>1</td>\n",
       "      <td>20000</td>\n",
       "      <td>40000</td>\n",
       "      <td>2019</td>\n",
       "      <td>08</td>\n",
       "      <td>20</td>\n",
       "      <td>2019-08-20</td>\n",
       "      <td>北京</td>\n",
       "      <td>东城区</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                  job            jobtag       salary edu     exp      company  \\\n",
       "id                                                                              \n",
       "1   JAVA高级研发工程师—阿拉丁科技  Java,Android,IOS  20000-40000  本科  经验3-5年  阿拉丁控股集团有限公司   \n",
       "2           高级Java工程师      后端,Java,服务器端  25000-35000  本科  经验3-5年         小帮规划   \n",
       "3           Java开发工程师              Java  20000-40000  本科  经验3-5年           玩吧   \n",
       "\n",
       "   address              benefits   comtype finance   comsize  number  \\\n",
       "id                                                                     \n",
       "1   北京·大望路             五险一金，周末双休        金融   不需要融资  150-500人       1   \n",
       "2    北京·望京           牛人多，氛围好，福利好  移动互联网,金融      B轮  150-500人       1   \n",
       "3   北京·东城区  七险一金,弹性不打卡,免费午餐,季度旅游        社交      B轮  150-500人       1   \n",
       "\n",
       "    salary_lower  salary_upper  year month day    pubdate city area  \n",
       "id                                                                   \n",
       "1          20000         40000  2019    08  21 2019-08-21   北京  大望路  \n",
       "2          25000         35000  2019    08  16 2019-08-16   北京   望京  \n",
       "3          20000         40000  2019    08  20 2019-08-20   北京  东城区  "
      ]
     },
     "execution_count": 38,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "lagou.head(3)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 39,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>number</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>job</th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>产品经理</th>\n",
       "      <td>1222</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>交互设计师</th>\n",
       "      <td>974</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>销售经理</th>\n",
       "      <td>909</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>测试工程师</th>\n",
       "      <td>776</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>销售专员</th>\n",
       "      <td>664</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>销售</th>\n",
       "      <td>638</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>运维工程师</th>\n",
       "      <td>634</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>游戏策划</th>\n",
       "      <td>616</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>视觉设计师</th>\n",
       "      <td>596</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>项目经理</th>\n",
       "      <td>577</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "       number\n",
       "job          \n",
       "产品经理     1222\n",
       "交互设计师     974\n",
       "销售经理      909\n",
       "测试工程师     776\n",
       "销售专员      664\n",
       "销售        638\n",
       "运维工程师     634\n",
       "游戏策划      616\n",
       "视觉设计师     596\n",
       "项目经理      577"
      ]
     },
     "execution_count": 39,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "result = lagou[['job','number']].groupby('job').agg({'number':'sum'})\n",
    "result.sort_values('number',ascending = False).head(10)\n",
    "#方法二：\n",
    "#lagou['job'].value_counts().head(10)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 招聘需求最大的公司是哪家？"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 40,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>number</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>company</th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>字节跳动</th>\n",
       "      <td>894</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>网易</th>\n",
       "      <td>695</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>美团点评</th>\n",
       "      <td>555</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>腾讯</th>\n",
       "      <td>507</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>贝壳</th>\n",
       "      <td>468</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>小米</th>\n",
       "      <td>373</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>爱奇艺</th>\n",
       "      <td>372</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>中国人寿</th>\n",
       "      <td>354</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>涂鸦智能</th>\n",
       "      <td>254</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>大搜车</th>\n",
       "      <td>227</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "         number\n",
       "company        \n",
       "字节跳动        894\n",
       "网易          695\n",
       "美团点评        555\n",
       "腾讯          507\n",
       "贝壳          468\n",
       "小米          373\n",
       "爱奇艺         372\n",
       "中国人寿        354\n",
       "涂鸦智能        254\n",
       "大搜车         227"
      ]
     },
     "execution_count": 40,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "lagou[['company','number']].groupby('company').agg({'number':'sum'}).sort_values('number',ascending = False).head(10)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 41,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "字节跳动    894\n",
       "网易      695\n",
       "美团点评    555\n",
       "腾讯      507\n",
       "贝壳      468\n",
       "小米      373\n",
       "爱奇艺     372\n",
       "中国人寿    354\n",
       "涂鸦智能    254\n",
       "大搜车     227\n",
       "Name: company, dtype: int64"
      ]
     },
     "execution_count": 41,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# value_counts()是一种查看表格某列中有多少个不同值的快捷方法,并统计有多少个重复值\n",
    "lagou['company'].value_counts().head(10)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 每个公司的下限薪资和上限薪资的平均值"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 42,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>salary_lower</th>\n",
       "      <th>salary_upper</th>\n",
       "      <th>salary_avg</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>company</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>深圳德璞医疗健康管理有限公司</th>\n",
       "      <td>200000.0</td>\n",
       "      <td>400000.0</td>\n",
       "      <td>300000.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>谷趣（上海）信息科技有限公司</th>\n",
       "      <td>120000.0</td>\n",
       "      <td>150000.0</td>\n",
       "      <td>135000.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>婚恋文化产业</th>\n",
       "      <td>100000.0</td>\n",
       "      <td>200000.0</td>\n",
       "      <td>150000.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>能量吧</th>\n",
       "      <td>100000.0</td>\n",
       "      <td>150000.0</td>\n",
       "      <td>125000.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>强国院</th>\n",
       "      <td>100000.0</td>\n",
       "      <td>147500.0</td>\n",
       "      <td>123750.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>高晟财富</th>\n",
       "      <td>100000.0</td>\n",
       "      <td>140000.0</td>\n",
       "      <td>120000.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>和煦咨询</th>\n",
       "      <td>100000.0</td>\n",
       "      <td>120000.0</td>\n",
       "      <td>110000.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>米米乐活</th>\n",
       "      <td>100000.0</td>\n",
       "      <td>110000.0</td>\n",
       "      <td>105000.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>北京国鼎源创智能科技有限公司</th>\n",
       "      <td>90000.0</td>\n",
       "      <td>160000.0</td>\n",
       "      <td>125000.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>兰西启点</th>\n",
       "      <td>90000.0</td>\n",
       "      <td>100000.0</td>\n",
       "      <td>95000.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                salary_lower  salary_upper  salary_avg\n",
       "company                                               \n",
       "深圳德璞医疗健康管理有限公司      200000.0      400000.0    300000.0\n",
       "谷趣（上海）信息科技有限公司      120000.0      150000.0    135000.0\n",
       "婚恋文化产业              100000.0      200000.0    150000.0\n",
       "能量吧                 100000.0      150000.0    125000.0\n",
       "强国院                 100000.0      147500.0    123750.0\n",
       "高晟财富                100000.0      140000.0    120000.0\n",
       "和煦咨询                100000.0      120000.0    110000.0\n",
       "米米乐活                100000.0      110000.0    105000.0\n",
       "北京国鼎源创智能科技有限公司       90000.0      160000.0    125000.0\n",
       "兰西启点                 90000.0      100000.0     95000.0"
      ]
     },
     "execution_count": 42,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 先把薪资的类型转换为int\n",
    "lagou[['salary_lower','salary_upper']] = lagou[['salary_lower','salary_upper']].astype(np.int64)\n",
    "\n",
    "#按公司计算上限和下限工资均值\n",
    "result = lagou[['company','salary_lower','salary_upper']].groupby('company').agg({'salary_lower':'mean','salary_upper':'mean'})\n",
    "result = result.sort_values(['salary_lower','salary_upper'],ascending = False)\n",
    "result['salary_avg'] = (result['salary_lower'] + result['salary_upper']) / 2\n",
    "result.head(10)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 每个学历的平均薪资是多少？"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 43,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>salary_avg</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>edu</th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>博士</th>\n",
       "      <td>38867.346939</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>硕士</th>\n",
       "      <td>28521.348860</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>本科</th>\n",
       "      <td>19052.889025</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>不限</th>\n",
       "      <td>12690.794122</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>大专</th>\n",
       "      <td>11306.578793</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "       salary_avg\n",
       "edu              \n",
       "博士   38867.346939\n",
       "硕士   28521.348860\n",
       "本科   19052.889025\n",
       "不限   12690.794122\n",
       "大专   11306.578793"
      ]
     },
     "execution_count": 43,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 先求平均薪资\n",
    "lagou['salary_avg'] = (lagou['salary_lower'] + lagou['salary_upper']) / 2\n",
    "result = lagou[['edu','salary_avg']].groupby('edu').agg({'salary_avg':'mean'})\n",
    "result.sort_values('salary_avg',ascending=False)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 每个工作经验级别的平均薪资是多少？"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 44,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>salary_avg</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>exp</th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>经验10年以上</th>\n",
       "      <td>42302.752294</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>经验5-10年</th>\n",
       "      <td>28662.487910</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>经验3-5年</th>\n",
       "      <td>18946.285011</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>经验1-3年</th>\n",
       "      <td>11667.812387</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>经验不限</th>\n",
       "      <td>11263.910434</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>经验1年以下</th>\n",
       "      <td>8035.658511</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>经验应届毕业生</th>\n",
       "      <td>7718.497110</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "           salary_avg\n",
       "exp                  \n",
       "经验10年以上  42302.752294\n",
       "经验5-10年  28662.487910\n",
       "经验3-5年   18946.285011\n",
       "经验1-3年   11667.812387\n",
       "经验不限     11263.910434\n",
       "经验1年以下    8035.658511\n",
       "经验应届毕业生   7718.497110"
      ]
     },
     "execution_count": 44,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "result = lagou[['exp','salary_avg']].groupby('exp').agg({'salary_avg':'mean'})\n",
    "result.sort_values(['salary_avg'],ascending = False)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 每个城市的平均工资"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 45,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>salary_avg</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>city</th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>毕节</th>\n",
       "      <td>51500.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>黄冈</th>\n",
       "      <td>35000.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>香港特别行政区</th>\n",
       "      <td>30555.555556</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>金华</th>\n",
       "      <td>27745.614035</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>定西</th>\n",
       "      <td>25000.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>红河</th>\n",
       "      <td>24500.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>北京</th>\n",
       "      <td>21109.011978</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>淮北</th>\n",
       "      <td>20000.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>宣城</th>\n",
       "      <td>20000.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>上海</th>\n",
       "      <td>18617.761747</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "           salary_avg\n",
       "city                 \n",
       "毕节       51500.000000\n",
       "黄冈       35000.000000\n",
       "香港特别行政区  30555.555556\n",
       "金华       27745.614035\n",
       "定西       25000.000000\n",
       "红河       24500.000000\n",
       "北京       21109.011978\n",
       "淮北       20000.000000\n",
       "宣城       20000.000000\n",
       "上海       18617.761747"
      ]
     },
     "execution_count": 45,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "result1 = lagou[['city','salary_avg']].groupby('city').agg({'salary_avg':'mean'})\n",
    "result1.sort_values('salary_avg',ascending = False).head(10)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 46,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "city\n",
       "毕节         51500.000000\n",
       "黄冈         35000.000000\n",
       "香港特别行政区    30555.555556\n",
       "金华         27745.614035\n",
       "定西         25000.000000\n",
       "红河         24500.000000\n",
       "北京         21109.011978\n",
       "宣城         20000.000000\n",
       "淮北         20000.000000\n",
       "上海         18617.761747\n",
       "Name: salary_avg, dtype: float64"
      ]
     },
     "execution_count": 46,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "lagou.groupby('city').salary_avg.mean().sort_values(ascending = False).head(10)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 47,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>count</th>\n",
       "      <th>mean</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>city</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>北京</th>\n",
       "      <td>24878</td>\n",
       "      <td>21109.011978</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>深圳</th>\n",
       "      <td>16124</td>\n",
       "      <td>17269.101960</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>上海</th>\n",
       "      <td>15387</td>\n",
       "      <td>18617.761747</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>广州</th>\n",
       "      <td>14068</td>\n",
       "      <td>13006.966164</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>杭州</th>\n",
       "      <td>8646</td>\n",
       "      <td>16451.596114</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>成都</th>\n",
       "      <td>5588</td>\n",
       "      <td>11460.719399</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>武汉</th>\n",
       "      <td>2910</td>\n",
       "      <td>11237.113402</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>南京</th>\n",
       "      <td>1412</td>\n",
       "      <td>12923.158640</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>厦门</th>\n",
       "      <td>1129</td>\n",
       "      <td>12509.300266</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>长沙</th>\n",
       "      <td>1032</td>\n",
       "      <td>10427.810078</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "      count          mean\n",
       "city                     \n",
       "北京    24878  21109.011978\n",
       "深圳    16124  17269.101960\n",
       "上海    15387  18617.761747\n",
       "广州    14068  13006.966164\n",
       "杭州     8646  16451.596114\n",
       "成都     5588  11460.719399\n",
       "武汉     2910  11237.113402\n",
       "南京     1412  12923.158640\n",
       "厦门     1129  12509.300266\n",
       "长沙     1032  10427.810078"
      ]
     },
     "execution_count": 47,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "lagou.groupby('city').salary_avg.agg(['count','mean']).sort_values(['count','mean'],ascending = False).head(10)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 找出所有以java开头的职位名称"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 48,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>number</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>job</th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>java开发工程师</th>\n",
       "      <td>290</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>java</th>\n",
       "      <td>36</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>java工程师</th>\n",
       "      <td>36</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>java高级开发工程师</th>\n",
       "      <td>25</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>java开发</th>\n",
       "      <td>23</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>java架构师</th>\n",
       "      <td>13</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>java后端</th>\n",
       "      <td>8</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>java技术经理</th>\n",
       "      <td>7</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>javascript</th>\n",
       "      <td>5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>java研发工程师</th>\n",
       "      <td>5</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "             number\n",
       "job                \n",
       "java开发工程师       290\n",
       "java             36\n",
       "java工程师          36\n",
       "java高级开发工程师      25\n",
       "java开发           23\n",
       "java架构师          13\n",
       "java后端            8\n",
       "java技术经理          7\n",
       "javascript        5\n",
       "java研发工程师         5"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "76\n"
     ]
    }
   ],
   "source": [
    "res = lagou['job'].str.startswith('java')\n",
    "r = lagou[res].groupby('job').agg({'number':'count'}).sort_values(['number'],ascending = False)\n",
    "display(r.head(10))\n",
    "print(len(r))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 49,
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>number</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>job</th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>java开发工程师</th>\n",
       "      <td>290</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Java开发工程师</th>\n",
       "      <td>146</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>java工程师</th>\n",
       "      <td>36</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>java</th>\n",
       "      <td>36</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>java高级开发工程师</th>\n",
       "      <td>25</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>java开发</th>\n",
       "      <td>23</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>JAVA开发工程师</th>\n",
       "      <td>21</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Java工程师</th>\n",
       "      <td>16</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>java架构师</th>\n",
       "      <td>13</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Java高级开发工程师</th>\n",
       "      <td>12</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "             number\n",
       "job                \n",
       "java开发工程师       290\n",
       "Java开发工程师       146\n",
       "java工程师          36\n",
       "java             36\n",
       "java高级开发工程师      25\n",
       "java开发           23\n",
       "JAVA开发工程师        21\n",
       "Java工程师          16\n",
       "java架构师          13\n",
       "Java高级开发工程师      12"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "279\n"
     ]
    }
   ],
   "source": [
    "r = lagou['job'].str.lower().str.startswith('java')\n",
    "r = lagou[r].groupby('job').agg({'number':'count'}).sort_values(['number'],ascending = False)\n",
    "display(r.head(10))\n",
    "print(len(r))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 求所有发布不同招聘岗位数量超过100的公司有那些，分别是多少？"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 50,
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>job</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>company</th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>字节跳动</th>\n",
       "      <td>613</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>美团点评</th>\n",
       "      <td>364</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>网易</th>\n",
       "      <td>321</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>腾讯</th>\n",
       "      <td>310</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>小米</th>\n",
       "      <td>232</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>贝壳</th>\n",
       "      <td>201</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>大搜车</th>\n",
       "      <td>154</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>拼多多</th>\n",
       "      <td>148</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>爱奇艺</th>\n",
       "      <td>148</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>作业帮</th>\n",
       "      <td>134</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "         job\n",
       "company     \n",
       "字节跳动     613\n",
       "美团点评     364\n",
       "网易       321\n",
       "腾讯       310\n",
       "小米       232\n",
       "贝壳       201\n",
       "大搜车      154\n",
       "拼多多      148\n",
       "爱奇艺      148\n",
       "作业帮      134"
      ]
     },
     "execution_count": 50,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 先按照 公司名称 和 职位去重，然后按照 公司名称  统计值的个数\n",
    "result = lagou.drop_duplicates(['company','job']).groupby('company').agg({'job':'count'})\n",
    "result = result[result['job'] > 100].sort_values(['job'],ascending = False)\n",
    "result.head(10)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 把上题需求使用柱形图表示出来"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 52,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "<matplotlib.axes._subplots.AxesSubplot at 0x1ee003c3880>"
      ]
     },
     "execution_count": 52,
     "metadata": {},
     "output_type": "execute_result"
    },
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAXMAAAEiCAYAAAALaDLAAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjIsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+WH4yJAAAgAElEQVR4nO3de5yVZb338c+XAQTxCBJqRsgTHiIVcVJIRXCrSWgHy52amXagx8cyD6WWVmzbmbk7mGUWhtlBKzykpmm4t5po8BgkqaGVFhoWiYJ4SDzgb/9xXYtZDGtgZNZ9z8zt9/16zWutdd2z1vWDWeu3rvs63YoIzMysd+vT3QGYmVnXOZmbmVWAk7mZWQU4mZuZVYCTuZlZBfTtjkq32mqrGDFiRHdUbWbWa82fP//xiBja6Fi3JPMRI0Ywb9687qjazKzXkvRwR8fczWJmVgFO5mZmFeBkbmZWAd3SZ25mVoQXX3yRxYsXs3Llyu4OpUsGDBjAdtttR79+/Tr9HCdzM6uMxYsXs+mmmzJixAgkdXc4GyQieOKJJ1i8eDHbb799p5/nbhYzq4yVK1cyZMiQXpvIASQxZMiQV3x24WRuZpXSmxN5zYb8G9zNYmaVNeKMG5r6eovOndLU12smt8zNzAqwYMECLr300obHjj32WBYtWtTU+npsy7yr36g9+RvUzKpvzJgxjBkzprT63DI3MyvAbbfdxrRp01i2bBmHHnoo++67LyeddNLq46effjp77703J598clPq63Qyl/RtSYfm+zMkzZF0Vt3xtcrMzF7tzjnnHI444ghmz57NihUruOmmmwCYMmUKd955JwsXLmTBggVdrqdTyVzSvsDWEfELSYcBLRExHhgpaVSjsi5HZmZWAQsXLmSvvfYCYK+99uL+++9ffR9g7NixPPTQQ12uZ73JXFI/4GJgkaR3ABOBmfnwLGCfDsrav85USfMkzVu6dGmXAzcz6w1Gjx7N3LlzAZg7dy6jR48GYP78+QDcc889NGNL8M4MgB4DLATOAz4OnADMyMeWAWOBQcCj7crWEBHTgekAra2t0aWozcw6oTsnQjz33HNstNFGnHjiiRxzzDF85zvfobW1lYMOOojLL7+cK6+8kgsvvJDdd9+dPfbYo8v1dSaZ7w5Mj4glkn4MvAUYmI9tQmrdP9OgzMzsVWfRokUceeSRtLS0cPHFFzN48GCuv/76NX6noymLXdGZZP4gMDLfbwVGkLpR5gK7AX8EFjcoMzN71RkxYgRz5swpvd7OJPMZwCWSjgD6kfrHr5O0LTAZGAcEMLtdmZlZ6SKi1y/pj3jlPdHr7Q6JiKcj4vCImBAR4yPiYVJCnwtMiogVEfFU+7JXHImZWRcNGDCAJ554YoOSYU9R2zVxwIABr+h5G7QCNCKW0zZ7pcMyM7MybbfddixevJjePmOutp/5K9Fjl/Obmb1S/fr1e0V7gFeJZ52YmVWAk7mZWQU4mZuZVYCTuZlZBTiZm5lVgJO5mVkFOJmbmVWAk7mZWQU4mZuZVYCTuZlZBTiZm5lVgJO5mVkFOJmbmVWAk7mZWQU4mZuZVYCTuZlZBTiZm5lVgJO5mVkFOJmbmVWAk7mZWQU4mZuZVYCTuZlZBTiZm5lVgJO5mVkFrDOZS+or6RFJt+WfXSTNkDRH0ll1v7dWmZmZlWd9LfNdgZ9ExMSImAiMAloiYjwwUtIoSYe1Lys2ZDMza6/veo6PAw6RNAm4F3gemJmPzQL2AXZvUPbn9i8kaSowFWD48OFdDtzMzNqsr2X+W+CAiNgT6AdMBh7Nx5YBw4BBDcrWEhHTI6I1IlqHDh3a5cDNzKzN+pL5PRHxj3x/HrAVMDA/3iQ//5kGZWZmVqL1Jd4fSdpNUgvwTuAEUjcKwG7AImB+gzIzMyvR+vrMzwYuBwRcB1wDzJa0LanLZRwQDcrMzKxE60zmEXEfaUbLapImAgcC50XEio7KzMysPOtrma8lIpbTNnulwzIzMyuPByvNzCrAydzMrAKczM3MKsDJ3MysApzMzcwqwMnczKwCnMzNzCrAydzMrAKczM3MKsDJ3MysApzMzcwqwMnczKwCnMzNzCrAydzMrAKczM3MKsDJ3MysApzMzcwqwMnczKwCnMzNzCrAydzMrAKczM3MKsDJ3MysApzMzcwqwMnczKwCnMzNzCqgU8lc0jBJd+f7MyTNkXRW3fG1yszMrDydbZl/BRgo6TCgJSLGAyMljWpUVlSwZmbW2HqTuaT9gWeBJcBEYGY+NAvYp4OyRq8zVdI8SfOWLl3atajNzGwN60zmkvoDnwXOyEWDgEfz/WXAsA7K1hIR0yOiNSJahw4d2tW4zcyszvpa5mcA346IJ/PjZ4CB+f4m+fmNyszMrETrS7wHACdIug0YAxxKWzfKbsAiYH6DMjMzK1HfdR2MiAm1+zmhvx2YLWlbYDIwDogGZWZmVqJOd4lExMSIeIo04DkXmBQRKxqVFRGomZl1bJ0t80YiYjlts1c6LDMzs/J4sNLMrAKczM3MKsDJ3MysApzMzcwqwMnczKwCXvFslleTEWfc0OXXWHTulCZEYma2bm6Zm5lVgJO5mVkFOJmbmVWAk7mZWQU4mZuZVYCTuZlZBTiZm5lVgJO5mVkFOJmbmVWAk7mZWQU4mZuZVYCTuZlZBTiZm5lVgHdN7AW6unujd240qz63zM3MKsDJ3MysApzMzcwqoFN95pIGA3sAd0fE48WGZD2Rr7pk1rOtt2UuaUvgemBP4FZJQyXNkDRH0ll1v7dWmZmZlaMz3Sy7AqdExBeBXwH7Ay0RMR4YKWmUpMPalxUXspmZtbfebpaI+DWApAmk1vlgYGY+PAvYB9i9Qdmf619H0lRgKsDw4cObELqZmdV0agBUkoD3AsuBAB7Nh5YBw4BBDcrWEBHTI6I1IlqHDh3a1bjNzKxOp5J5JCcA9wBvAQbmQ5vk13imQZmZmZWkMwOgp0s6Jj/cAjiX1I0CsBuwCJjfoMzMzErSmamJ04GZkj4M3AdcA9wuaVtgMjCO1PUyu12ZmZmVpDMDoMuBA+vLJE3MZedFxIqOyszMrBwbtNFWTvAz11dmZmbl8EClmVkFOJmbmVWAk7mZWQU4mZuZVYCTuZlZBTiZm5lVgJO5mVkFOJmbmVWAk7mZWQU4mZuZVYCTuZlZBTiZm5lVgJO5mVkFOJmbmVWAk7mZWQU4mZuZVcAGXZzCrDuMOOOGLr/GonOnNCESs57HLXMzswpwMjczqwAnczOzCnAyNzOrACdzM7MKcDI3M6sAJ3MzswpwMjczq4D1JnNJm0u6UdIsST+X1F/SDElzJJ1V93trlZmZWTk60zJ/H/C1iDgIWAIcAbRExHhgpKRRkg5rX1ZcyGZm1t56l/NHxLfrHg4FjgbOz49nAfsAuwMz25X9uf51JE0FpgIMHz68S0GbdSdvK2A9Uaf7zCWNB7YE/gY8mouXAcOAQQ3K1hAR0yOiNSJahw4d2qWgzcxsTZ1K5pIGA98EPgg8AwzMhzbJr9GozMzMStKZAdD+wBXApyPiYWA+qRsFYDdgUQdlZmZWks5sgfshYCxwpqQzge8D75e0LTAZGAcEMLtdmZmZlaQzA6AXARfVl0m6DjgQOC8iVuSyie3LzMysHBt0cYqIWE7b7JUOy8zMrBweqDQzqwAnczOzCnAyNzOrACdzM7MKcDI3M6sAJ3MzswpwMjczqwAnczOzCnAyNzOrgA1aAWpm3ct7qlt7bpmbmVWAW+ZmtsF6whlCT4ihJ3DL3MysApzMzcwqwMnczKwCnMzNzCrAA6BmZk3Q3QOxbpmbmVWAk7mZWQU4mZuZVYCTuZlZBTiZm5lVgJO5mVkFOJmbmVWAk7mZWQV0KplLGiZpdt3jGZLmSDprXWVmZlaO9SZzSVsCPwAG5ceHAS0RMR4YKWlUo7IigzYzszV1pmW+Cngv8FR+PBGYme/PAvbpoGwNkqZKmidp3tKlS7sQspmZtbfeZB4RT0XEirqiQcCj+f4yYFgHZe1fZ3pEtEZE69ChQ7sWtZmZrWFDBkCfAQbm+5vk12hUZmZmJdmQpDuftm6U3YBFHZSZmVlJNmQL3GuA2ZK2BSYD44BoUGZmZiXpdMs8Iibm26dIA55zgUkRsaJRWdMjNTOzDm3QxSkiYjlts1c6LDMzs3J4oNLMrAKczM3MKsDJ3MysApzMzcwqwMnczKwCnMzNzCrAydzMrAKczM3MKsDJ3MysApzMzcwqwMnczKwCnMzNzCrAydzMrAKczM3MKsDJ3MysApzMzcwqwMnczKwCnMzNzCrAydzMrAKczM3MKsDJ3MysApzMzcwqwMnczKwCnMzNzCrAydzMrAKamswlzZA0R9JZzXxdMzNbt6Ylc0mHAS0RMR4YKWlUs17bzMzWTRHRnBeSLgBuiohfSjoCGBgR3687PhWYmh/uCPyxi1VuBTzexdfoqp4QA/SMOHpCDNAz4ugJMUDPiKMnxAA9I45mxPD6iBja6EDfLr5wvUHAo/n+MmBs/cGImA5Mb1ZlkuZFRGuzXq+3xtBT4ugJMfSUOHpCDD0ljp4QQ0+Jo+gYmtln/gwwMN/fpMmvbWZm69DMhDsf2Cff3w1Y1MTXNjOzdWhmN8s1wGxJ2wKTgXFNfO1GmtZl0wU9IQboGXH0hBigZ8TRE2KAnhFHT4gBekYchcbQtAFQAElbAgcCt0fEkqa9sJmZrVNTk7mZmXUPD1KamVWAk7mZWQU4mZuZVUCvSuaS3inpHZL+TdI4SW+SNLykuiVpJ0ktecYOkvp4H5o1SXqtpB9IOqrkeqdK6p/v7yxpQsn1K9+2SOovqW9+PFzSwHU/u9C4/l831r1FN9b9/gZlfSSNLqn+UXX33yypX9F19qpkDnwE2Jq0uvRtwIeAL0v6Xgl19wW+BewM3CrpcmAHoOyk0bfufh9J50i6RNIXJH1U0mtLimPjWvKsK9sE2JK0ArjwJJK/XLfJD48GaqP5p1Pi30XSjsDxkiYBRwALgCn58IHAb0qK4+x82yLpw7n4g2XUXRdDfU65UtL0/L4o2wdyPLvUlY3OMX2hhPp/K+knkr4E/DtwlqTjJH2+qC/YZs4zL4ykccDTwErgBmBpRDxfd/yWomOIiBclvRQR9wE7ShoP/KvoehtYIimAB4Hvk+bzfxgYQXqz/oRyEtm1wAv5y0W5bCWwBPgtUMYZy/bA+ZKOA17Kf6MpwE6kL/6yvAQ8D3wW+B7w5Yi4FiAiZuT4yrBfvg3g/TmW50qqu+Z6Se+JiH8BLaT36GWS3hcRz5QYxwv59huSPhUR8yPiXkm7AjeWUP/dwKnALhHxK0mHk96TnwY+ImlCRNzezAp7fDLPpycfIH1gdgI+BwyVtDHwe+Ai4KCSwtlLUn0rS8BOuUzAoIjYteAY7iZ9WD8NtOZ6pwFnRsQtkkrbfyIipsDqs4X/ioiTy6o713+jpIXAkBzHJ4BJwMER8WKZsWT1DYyfA8OAfsCAkup/GSAiXpa0KpeVPfd4G+BsSTcDG0fEHEkzgK8CHy0jAEm/Bt4o6X9y0aeB98DqRlkZPRI7A99J4egEYEWuf76kJ0hnr03V45N5/lAeDyDp8oio7bxYO4X6PqkF8uMSwrkrIt6a696BtLHYNRFxYAl1txd1t18FfiTpPyLiA2UFIOmYuocT2j3uHxGFdn9J+hSwivSFNhw4CpgJfDB3YfeNiPOKjCGrnQ0MrivbPCLeUkLdwOouLiQdDbwLGC1pJqmxMZP0f3FYCaE8CfyA1OjaIX9GTwGeljQkIp4oOoCI2E/SjRExWdKNwDJJB0XErKLrrvNa4HDgLmB/UtfbzyR9Lh/vRzqTa5oen8xrJJ0BnCnpiog4PCeOzwKfIm2pW3T9fYEWSVsBPyWdvp5OWxdDdxFwJHAIcK2khRHxz5LqfY7cGiQl1Wfrjj+/1jOa70naWjgr8+PHaOv+6t/oSQUYTjo7KKsFvgZJryd1P24C3AzcAVwOnAxcTTrdL3wALrdAAxhF+kweQ0rq7yd1Bx4NfKOEOLZIN+qf4/kk8AtJd+TunzIEcCzp7OytpG7JpfkWCnhv9qYB0IMi4q/A5pJaSDs0/gJYEBFfLqH+VcCpEfE4cExEHAo8AFyTBzV2LiGG9k4DXkf68J4NfIz04SnDpcBVEXEV6Q06M99/HhgQET8rOoCIuJjU7fRX0gflRtIHd0VEXBURPyk6huwiYBZtW0ADPCfpVkk/kjSsyMoj4mHgzaRT+R/k4pcj4lHghYj4W0T8pcgYsuWkAfDBwGdIX/jLgO2Am4B/KyEGSGfpY4DrSWeITwHnAReX0cWSu4CvA3YF3keaKNEHeCoifp9/ftvsentTMn8p37aQTqUfBO4l9U0VLtK+B++S9PmI+LukL5LODIbkn6tLmor1JmAX4IukjXv+Thocfh7YAtishBggtSxOkbQ5aZbP65UuUPI24KK62RSFkbQ36QM7iPQnOh94O/B1SfsWXX8DG+fbiIgpETEJ+Dnw7aIrjojnSMn0I8DFwGuKrrNBDJeTujz7ArcBhwLnAEdExNOs2Q1VZByHAL+LiIPIOS4ibiB9Vq6kgP7qdvX/i3S2fG+ubwnpbH6opOsk/TJP6miqXtPNQjqFhNRCPol0CncUcAUljE7nUfCWiKj1cx0K/F/auln+QPowP1lwKK2kLoUWUrfGDNIp9juBdwOFz5xQmtv/FtKA1muBTUmDTLX/i+9Rwmk9sBDYOyKW5+l4ioiHcxfcTEk7RcSq9b1IE/QFNgIuI/0fnCzphYj4aURcLek8SZtHxIqC49g4Iv4m6Uig1j++8bqe0GwRcaGkL0XEM5I2yn+P2sD4/SWGUpvbf3Vd2WmknHF3CfU/TzpT+jOp2+8jwPSIeHtRFfaajbYkDYqIZyWdXutWkTQS6BMRD5YcS1/gnIg4rcx6c91vJCXzen1pO3P5R26llRXPIFJS/zmp5fVARDy77mcVEsdU0mD4S6QP8oiIWFhS3f1IM5mezAORQ0hdPU9K2gwgn+oXHUefiHi5Xdlx9ZdvfLWQNDYiftfdcQBIGhMRCyRtXeRusr0pme8UEQ/UPX5NRDwm6WPAgxFxUzeGR+5ueFNE3FlwPUtI/bOri2ib2dIfGF70LIo84DaM1Pr7IfB1Ur/15cB/kFokWwGHRETRZyq1mPqRvlQ+BkyLiJ+WUW+u+1DgRdoGg/uQkvkcSdcDPy4znkYkHQzcEhEvrPeXm1fnFmX9/RvUXcsPbwSGRMTsboqjX/tpsnlg9riI+G4z6+oVfeaSBgNfUVomvlmeT3x+PnwfKZmUFctt+XYXSTvm21bg18DrSwjhftKp49akmQsnk+bbbxQRR5D66Yq2irSycFi+30palXsi6e/xA1IXyPYlxELuH/8dqUW8dzckzguAPYHx+WcvYFSeOrmkjHgkLcn9sY9I+q2kSUorg2vbXZxC6porOo5uXwGak+Vlkl5HuvrZbpK2zT/bqISl9XUW5L/L4ZI2ymWryCtUm6m39Jl/izS4Mxa4hLRI6DpJPyK1TMvoF62ptWxmkUbo3wq8AZhQxqk0aXDtGknzSEnzm6SBnQn5YBkLM/6dNGthFOnffxdwc0R8TdI04KMRcXAJcdQ8AEzKM426w6KIOLu+QNJepH7SSSXFMCMizpT0ZdKXex/SF8w3JP2MNLuljO63bl8BGhEvSHqJdOY4lLSlQm11bB9SQ2jvMmIhzWjZI8cyTVJrRDyntkVdTdNbkvm1wFOkwb6VwBdIyX0JKZmXdupY54GIOE7SraT/x7MkPR0Rhe/7oLRx05Gk/5eD888vJZ1QxJSnBr4L/DHHMJCUNFbm2Tyb09bdUCi1LRqqPa4//DJwZ0n/H5Hr/xywmHT92z8C74mIl9bxvGbaS9J1pM/DAaTPxGOklY8LgAtLiqPbV4DW2Z00GH9/RFxWK5RUeBz5vVD/5bWUNJ33eKU3atP7t3tLMh9JenOOAB4hnTLeQZq/Opc0x3p50UFIejOwqaSxrPnH2IZ0ml/43GrSh/UQ0iq/m0itjlOBM4F/lFA/pLntk0j/5zeQVrjNIb2fppP60cvwd9YeDK7pSzqL26WD402VB8WfI40V7ExqcNwm6awoZ2BqekTMVFoB+jLwT9JZ29Wkv9EE0vLyonXrCtCcKN9NGj96jtzQU1pC/yfSnO8yWuXLSVOGSxuU7BXJPCK+JOlq4HHSCrLZwFdILdK3kXYxfFtE/L3gUD5BOqX/eLv4/pj7BScBZWz6dYWkuaQ5rBNIU58WRcTiouvO9T8g6UXSh+VI0jSszUjdLlOBByQdHhFXFBzHZbB68Hll1G2+lss3LbL+drG8BPxXu/o/RZpjfnwJIXwoD3JOJp0VnEPqUjg1Ih6UdJOk/kUOgKpnrAAdQDpT3JM1F3HNj4iDJN1cP5GiQC+QZlY1OkstZNV4r0jmkm4ntcJGkmI+mvQGeZrUF3UsUMaeD0fXxVRL2iFpJ9Ic1m8VHUOurz8pSRwXEQvzFM3zJH296Nk0dSaRPqw7kwZCf0TqBqstpLqctAagDBNJA+S3AZdExByAKHhvmDo7KC2Yqvf3iDhX0jWS9oyIuwqO4UjSfP8bSN0qrwP2rBvHuYTUWi2yS7L9CtA+rLkC9DIKTuZ5XOC0fEawirUnJZTVUt6FNKur9v9dq7ew7T96xdRESbPyt+rXSV0tA0l96JD3No+IP5Uc0zJgHmkmxxhgWRkDPJKWkxY9tO93E2nA6aWI2L+EOP5K6lrZlJTMHyK1huaSZtxcFRGlLRLJp9cHkM6alkXEsSXWvQ/pQ/sy6W/SB3gxzy3eDzg8Ij5WcAxfJSWvKaTuvncC19T9yvy8CrJQuXW+irSk/uvAfwKnRMQnlPZG2afoGHIcvySN7RxBWiV8Pm2f18Mj4rYy4sixjMxxHAAcEGlXy9sjoqlbVfeKljlpB7hLSMniYOBLtC2SeZw0Pa+MU9l6u5K6F/rnuM6WdGFEFN0vuQMd98WJtpWyhYqIhtMOc3fTvsB7SVvzliL3S98M3Ky0kKk0EXHHOg7fTvrSK9oVpC/WsaTW+Vak98nttI1lFJ7Me8IK0Hzm2h+4h3TWLtJZ/UpSQ7CMTeBqscwkdTnNBD6eE3lfCthoq7e0zLckLcroS2qRb03bH2Rj4LCIKHw3tnXJc0h3jogF3RmHvXoprTY9kLSE/AXgdRFxcz72uoj4WwkxDAG2i4jfSzoV+FpJA8Dt49iMlDBfA2xZ3/1Y9NhBXT3vBH4TEY/VlY0m/X2OjohLmlpfb0jmAJLeHhHXdXMMm5G+VBrNEW0h7dBW9P4bZh3KZyW3RMRedWVvAP6b1NgobK65pMmkWTTvIG0EdzPpywVS63ijotdi5C6vgaRL9X2e1F//m4j4Vt3v3BppE7RC5cHWA/Mipesj4q2S7oyIQmbT9JZuFkj7ll8n6ZvA/yG10B8Gzo2IwqclZo+QpiDC2gMZY0h92YX3V5utw7+oG+TMaxIuBU4sYdHQJNLssj6k7p7RpBlXyj/9Kf6qYCNzHdNIXbHHkKZkfkvSuRFxBiWtg6iJdHWj2qrTws4IelMyr+1v8CbSTJaBpCWxnyTNsS7DvRGxv6QLIuLE+gOSZpcx8Gi2LhERSteIrV0NazpwQRlntRFxWp5CvH9EnJNbwIXtEtiBe0jrPo4jzbCCttzxbkn/ouC++7x47ql8fwLpi2yLfL922zcimjqNuVckc0mzSYOg15Per4tz+XWUtCgk6y/pWGB/Se+nbUbJ7ZR/rUWz1XIXx+akQc9hedXlYGBqWTO9JH2G1N34AUkTgV3zFN7HgAtL2uxqMWkJ/0GkM4Fod+wl0oLDIr0HOIG09mJirvMq0sSAK/NtC01ek9IrkjlptePPSSPTq+cu57m7Rc/fba8Paa/u04CvkWYQfK3kGMzaG0y6fF0L6WIdryftd1/WpfMgbTa3AngyIr6Z14cckGP5du4v/s8iA4iIx5UuWhKkFdGfJW16tjEpue5M+swWtvlZRHwvT408hjR+cAlp/UOhe0j1il0T8wZKL9Y2UpJ0Qbufi0oK5fk8Av0X0gDPe0hTjrp1Jo1ZRFwWEV/KyfKvEXEA6YIhF+SzyDJiuDMi7gMG53n//x0RL0e63OOhFH/hlpo/kVaIB2mu+1PA/5DWQ5xCuqBKYSRtD7whIs4lDQBvRQkN517RMpd0DzA8r/B7mbbrHEKJc6vrDIiIn0r6Q47lMLr/ws5mNQGQVwdPBm6StKQ2TbEoeeHSi6TVqBsB4yXVLnIdlLN3EaTVrz8jnSU8Q1qZeiRp8dCOQD9JAyKio319umob4FRJX6HtC2yS2jaC6wP0i4j9Gj15Q/WKZB4Ru0q6JQ8+3hIR87s5pHNyXPfms4ItcJ+59QB59srq/boj4nlJxwM3Stql4FXKV5GS+Vvr7i8lLZoaRbrc4wcLrL/mLtJZydmklvlVkS4teBjpTPqEAhM5EfEb4B2SXkNakfxe4CzS2QGkhl/T91TvTfPM74yIvfNg6H7R7vJYJcXwFGmjrVWsvdfCDsAjEbF72XGZ1ShdHGJERPylXflRwM+K7rfNdX049xvvRrrKzy151eOqMhcQSdqKtFfMw7VFQpLeHOVsi1wfxzDSPkrnFlpPL0rmR5GvcB0R/5Q0BfhVlLdfdKOY3hh115nMU8EeKfJb38yskV4xAJpdQFoa/M/8+JOUPPkfQNJXJG2SV4OeL6mPpCvz4ZNIl6kyMytVb0rmd0fESXlL0T4AZXe1KO2P3Qq8mXSdx2dyDAMktZAuD3VrmTGZmUHvSua1/qD9SIMopfYPKV0c9hekuaqrWHNZbpAGOM4ro0/SzKy9XjGbRdIedQ8XkLoyRko6h5RIfxcRVxUZQ0T8TelKLneR9owWsLGkdwFvIfXfFxqDmVlHenzLPO9zUH+R5CBNOXqatCx3DnCupDLmeYt0qbQV+edl0qq7l4AdS4rBzGwtPT6ZR8STEfG2dmV/AR6PiF9GxPWk1V6FnmXkPdVvJHWx3JF/VuZ91O8ibTE6rcgYzMw60uOTeb18AYg/5Ier+8wj4rsR8WLjZzVH3mZ3MimZt79SiCLiWmDPfCZhZifai8wAAAICSURBVFaq3pTM9yDtvPbx/Lil7G6NvB/0/ye1ymcDm+bFEMvyr8wi9Z+bmZWqNy0aEqy+1iOS9ujuZf2S3hARD9Y93igiSru+oJlZTa9J5mZm1rHe1M1iZmYdcDI3M6sAJ3MzswpwMjczq4BesZzfrDPyVW0uBbYjXeHlfcB3gG1JF/M9jrRi+DHS3jpbA98HxpH23BlG2tDtY5I2IV18dxDwYEQcJ2ka6aIC++bfPxg4Hrg/X3lqGvBARBR2fUmzjrhlblUyFfh9ROxDutLNJ4D78uW5/kzaoG1j4HBgV+Ao0u6XAFdGxN7A9nkvoG2Ab5IuSDwiX2AA0rUdJwBXA/sDP8yvA+kKO9cW+080a8zJ3KpkJ9LWCpBa6NuQFnkBzCVdmf2f+dJpD5NW89YWntXWLNwDjCBd8uzDwGXAYGBgPv7DfPsI0D8iHiItHptI+uJ4rtn/KLPOcDK3KnmAtNc8wGfy43H58TjatoJoZM98OwZ4CPgQqZvlSODZut97lrX9FLiEtkRvVjonc6uSi4Gxkm4DxpJa56Ml3U66oPCl63juIZLuJPV5LwBuJl0U+JZ8/LXreO6VpL2C7uhK8GZd4RWg9qon6VJgWkQs2oDnjiYNon43ImY0OTSzTnMyNzOrAHezmJlVgJO5mVkFOJmbmVWAk7mZWQU4mZuZVcD/AkTZ+h3uqeSNAAAAAElFTkSuQmCC\n",
      "text/plain": [
       "<Figure size 432x288 with 1 Axes>"
      ]
     },
     "metadata": {
      "needs_background": "light"
     },
     "output_type": "display_data"
    }
   ],
   "source": [
    "import matplotlib as mpl\n",
    "%matplotlib inline\n",
    "\n",
    "# 设置字体支持\n",
    "mpl.rcParams[\"font.family\"] = \"SimHei\"\n",
    "mpl.rcParams[\"axes.unicode_minus\"]=False\n",
    "\n",
    "result.head(10).plot(kind = 'bar')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 53,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAX0AAAD2CAYAAAA6eVf+AAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjIsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+WH4yJAAAgAElEQVR4nO3deXhV5bn+8e9D5oEkBMIMCQlDGAMSEAQkIKhYR7RgVawjojjV01at9NfqoYO2ogWLFad68IhSW604IJMRVBCCzGMChClAAoGEQCDT8/uDTeVgEpKdYe3h+VxXru6svdbe9+5V7r5Ze633FVXFGGOMf2jidABjjDGNx0rfGGP8iJW+Mcb4ESt9Y4zxI1b6xhjjRwKdDlCdFi1aaEJCgtMxjDHGq6xevfqwqsZV9pxHl35CQgIZGRlOxzDGGK8iIrures5O7xhjjB+x0jfGGD9ipW+MMX7ESt8YY/yIlb4xxvgRK31jjPEjbpe+iLQSkWUX2Od1EVkuIlOq22aMMaZxuHWdvog0A94CIqrZZywQoKqDReQNEekC9D5/m6pmupXc1EnR6TI25xSycX8BxaXlRIUFEe36iQoNJDosiFZRoUSEePStHMaYWnL3X3Q5MB74dzX7pAFzXY8XAEOBfpVs+z+lLyITgYkAHTt2dDOeOZeq8t2eY2Rk57Mxp5BN+wvYdeQEF1pKIShAGNq5BWN6tWF0j1Y0iwhunMDGmAbjVumraiGAiFS3WwSw3/U4H7ioim3nv/YsYBZAamqqrfBSB6XlFXy64QCvLtvJxv2FALSLCaNn2yiu79eOXu2i6NU2mqiwIAqLSyk8VUpBcSmFxWUUFJeyKaeAzzYe5It/rifgA2FQYixX9mrDFT1b0bJpqMOfzhjjjob8270ICHM9juTM9weVbTP1rOh0Ge+u3MObX2ez/1gxiXER/P6G3lzZqzWxVYzWQ4MCaBn1f4v8+n7t+NVV3dmUU8inGw4wf+NBfv3hRp6Zt4k7Lkngocu6EBUa1BgfyRhTTxqy9Fdz5vTNCiAF2Absq2SbqSdFp8v46xdZvL1iN8dPlTGwUyxPX9uTkcktadKk2r/KqiQi9GoXTa920fziim5sP1TEG1/t4rWvdvHBmv384opu/Lh/B7df3xjTuKQua+SKSLqqpolID+AWVT33Kp0oYBmwGBgDDAL0/G2qWlDV66empqpNuFYzq3cf5WfvrWXv0ZNc1asN916aSN8OMQ32fhv2FfDbeZtYvfsovdtF85trepCaENtg72eMqTkRWa2qqZU+15ALo7uu8hkNLFXVg1Vtq4qV/oWVllcwY0kWLy3JpG1MGC+O79to5auqfLQuhz98upWDhae4rm9bfnNNzypPIRljGodjpV9XVvrVyz58gkffW8vavccYe1E7nr62J00dOMd+sqSMl9N38MqXO2kVHcJrtw+gW+umjZ7DGHNGdaVvX6R6IVXl3ZV7uGr6MnYdPsFLt/Rj2ri+jhQ+QHhwIP91eTfmThrM6dIKxs78moWbDzmSxRhTPSt9L1Neofzqg4088a8N9OsYw/xHh3F1n7ZOxwKgb4cYPnpwKEktI5k4O4OZ6Vl48l+SxvgjK30vUlpewaPvrWXOyj08kJbE7Lsupk102IUPbESto0OZe99gru7Tlufmb+Nn763lVGm507GMMS52j72XOFVazuT//Y7FW3N5Ykwyk4YnOR2pSqFBAUy/uS/dWkXy5wXb2XX4BK/envqD+wCMMY3PRvpeoOh0GXe+uYol23KZen0vjy78s0SEB0d24ZUJ/cnMLeInr67gcNFpp2MZ4/es9D3csZMl3Pbat6zMzueFcX25bVC805Fq5Yqerfn7nQPZf6yY219fSUFxqdORjPFrVvoeLPf4KW6etYLNOYW8fOtFXN+vndOR3DKwUyx/u60/mbnHufPNlZw4XeZ0JGP8lpW+hyo6Xcbtr69k95GTvHHHAC7v2drpSHWS1q0l02/ux9q9x5g4O8O+3DXGIVb6HqisvIIH3/mOzNwiXpnQn6FdWjgdqV6M6d2G525K4eusIzw0Zw2l5RVORzLG71jpexhV5el5m0nflsd/X9eLS7vGOR2pXt3Uvz3PXNeThZsP8fN/rKOiwq7jN6Yx2SWbHuaNr7OZvWI3Ey9N5JaLfXMRmdsHJ1B0uozn5m8jJiyIp6/r5XQkY/yGlb4HWbDpIFM/2cyVPVvzxJXJTsdpUA+kdeboiRJeXbaLnu2iGZfawelIxvgFO73jITbsK+CRd9fSp100L4zv6xfz0z9+ZTJDOjdnyocb2bCvyhm2jTH1yErfA+QcK+but1YRGxHMqz9NJSw4wOlIjSIwoAnTb+5HXGQIk95eTf6JEqcjGePzrPQddqq0nHv/J4OTJeW8cccAv1t7tnlkCC/fdhF5Rad5eM4ayu2LXWMalJW+w/742VY25RTywvi+fjsHfZ/2Mfz3dT35Kuswzy+wFTSNaUhul76IvC4iy0VkShXP3y8i6a6ftSLyiogEisiec7b3dj+691u0+RB//yabOy5JYHSPVk7HcdT4AR35ycCOzEzfwfyN1S6oZoypA7dKX0TGAgGqOhhIFJEu5++jqi+rapqqpnFmXdxXgT7AnLPbVXVDHbJ7tYMFp/jF++vo0SaKJ6/y7St1auq31/YgpUMMP//HOrJyi5yOY4xPcneknwbMdT1eAAytakcRaQe0UtUMziyOfrWIrHT9pfCDS0ZFZKKIZIhIRl5enpvxPFt5hfLIu2s4XVbBjFv6ERLoH1/cXkhIYAAv33oRIYFNuP/t1TZVgzENwN3SjwD2ux7nA9Wdm5gMvOx6vAoYpaoDgSDgqvN3VtVZqpqqqqlxcb51N+pZM7/I4ttd+Tx9bU+S4iKdjuNR2saE8eLNfcnMLeK5+XZ+35j65m7pFwFnl2yKrOp1RKQJMAJId21ar6oHXI8zgB+cFvJ1Gdn5vLg4k2tT2nJT//ZOx/FIw7rEcfvgeN74ehfLdxxxOo4xPsXd0l/N96d0UoDsKvYbBnyr3y+UOltEUkQkALgeWOfm+3ulgpOlPPLuWtrFhPG7G3oh4vs3YLnriTHJJDQP5+f/WEeRTcVsTL1xt/Q/BCaIyDRgHLBJRKZWst8VwNJzfn8GmA2sBZar6iI339/rqCpPfrCeQ4WnmP6TfjQNDXI6kkcLDw7k+XEpHCgoZurHm52OY4zPcGvuHVUtFJE0YDTwnKoepJJRu6r+6rzfN3LmCh6/89G6HD7dcJAnxiTTt0OM03G8Qv/4WCZemsTfvtzB5T1bMTLZvy9rNaY+uH2dvqoeVdW5rsI31cg/UcLT8zaT0iGGe4clOh3Hq/xsdBeSWzfl8X9u4KhN02BMndkduY1g6sebKSwu5dkbexPgBxOp1aeQwACeH5fCsZMl/L+PNjkdxxivZ6XfwNK35fKvNft5IC2J5NZRTsfxSj3bRvPIZV2Yty6Hj9fnOB3HGK9mpd+ATpwu46kPNpIUF8HkkZ2djuPVJg1PIqVDDFM+3Eje8dNOxzHGa1npN6A/fb6NnIJinr2xj911W0eBAU14/scpnDxdzh8+3eJ0HGO8lpV+A/luz1HeWp7NhEHxpCbEOh3HJ3RuGcl9wxP515r9dtOWMW6y0m8AJWUVPPHP9bSJCuWXPr7sYWObPKIzHWLD+PW/N1JSVuF0HGO8jpV+A5iZnsX2Q0X87obeRIbYMsT1KTQogGeu7UVWbhGvfbXT6TjGeB0r/XqWlXucv36RxXV92zIiuaXTcXzSiOSWXNGzFdMXZ7I3/6TTcYzxKlb69UhV+e1HmwkLCuDXV/dwOo5P+801PWkiwtPz7Np9Y2rDSr8eLdh8iK+yDvPY6K60iAxxOo5PaxsTxiOXdWHRllwWbj7kdBxjvIaVfj05VVrO1E8207VVJLcNinc6jl+4a2gnuraK5LcfbeJkic3EaUxNWOnXk9eW7WRvfjG/vaYngQH2X2tjCApowtTre7P/WDEzlmQ5HccYr2DtVA9yjhXz1y92MKZXay7p3MLpOH5lYKdYburfnleX7iTz0HGn4xjj8az068EfPttKhSq/uqq701H80pNjkgkPDmDqJ3anrjEXYqVfRyt35TNvXQ73DU+iQ2y403H8UvPIEB6+rAtfbs8jfVuu03GM8WhW+nVQXqH85qNNtI0O5f7hSU7H8WsTBscT3zyc33+6hbJyu1PXmKq4Xfoi8rqILBeRKVU8Hygie0Qk3fXTuybHeZM5K/ew5UAhT/2oB2HBNqGak0ICA3hyTDLbDxXxXsZep+MY47HcKn0RGQsEqOpgIFFEulSyWx9gjqqmuX421PA4r3DsZAl/XrCNizvFclXv1k7HMcAVPVszMCGWaQu2c/xUqdNxjPFI7o7004C5rscLgKGV7DMIuFpEVrpG94E1OU5EJopIhohk5OXluRmv4f1lcSaFxaX89tqeiNhqWJ5ARJhydXeOnChhZvoOp+MY45HcLf0IYL/rcT5Q2YrVq4BRqjoQCAKuqslxqjpLVVNVNTUuLs7NeA1rz5GTvL1iN+MHdKB7G1sNy5P0aR/D2H7teP2rXTYvjzGVcLf0i4Aw1+PIKl5nvaoecD3OALrU8DiP96cF2whoIjw6qqvTUUwlfn5FN5oIPPf5NqejGONx3C3d1Xx/aiYFyK5kn9kikiIiAcD1wLoaHufR1u87xrx1OdwzNJFWUaFOxzGVaBsTxsRhicxbl8Pq3UedjmOMR3G39D8EJojINGAcsElEpp63zzPAbGAtsFxVF1Vy3Cduvr8jVJU/fLqV2Ihg7hue6HQcU437hicR1zSEqZ9sRlWdjmOMx3Cr9FW1kDNfyq4ARqjqOlWdct4+G1W1j6r2VtWnqjiuoC7hG1v69jyW7zzCwyM70zQ0yOk4phoRIYH84vJurNlzjHnrD1z4AGP8hNvn1FX1qKrOVdWDjXGc08orlGc/20p883Buudhm0fQGN/ZvT3Lrpvz58222tKIxLl75RaoTPlizn60Hj/OLK7oRHGj/tXmDgCbC41cmsyf/pN2wZYyLtVcNnCotZ9qCbaS0j+ZHvds4HcfUQlq3OAYkNGP64kyKS8qdjmOM46z0a+Dv32STU3CKJ8Z0txuxvIyI8Msrk8k7fpo3v9nldBxjHGelfwHHTpYw84ssRia3ZHBSc6fjGDcMSIhlZHJL/pa+g4KTNj2D8W9W+hcwM30HRafLePzKZKejmDr4+eXdKDxVxitLbXoG49+s9KtxsOAUb32TzQ392tOtdVOn45g66NE2iuv6tuWNr3eRW3jK6TjGOMZKvxp//SKL8grl0VFeOxmoOcdjo7tSVq62nq7xa1b6Vdibf5J3V+1h/IAOtiKWj4hvHsH4AR2Ys3IPu4+ccDqOMY6w0q/C9MWZiAgPjbRRvi95+LIuBAYILyzc7nQUYxxhpV+JnXlF/PO7fUwYFE/raJtUzZe0igrljks68e91OWw5UOh0HGManZV+JV5clElIYAD3p9m6t77o/uFJNA0J5M829bLxQ1b659l6sJB563O4c0gCLSJDnI5jGkB0eBD3DU9i8dZc1u495nQcYxqVlf55pi3YTmRwIBMvtamTfdlPL0mgWXgQLy6yc/vGv1jpn2P9vmMs2HyIe4YlEhMe7HQc04AiQwKZeGkS6dvybKEV41es9M/x/ILtNAsP4q6hCU5HMY3g9sHxNI8IttG+8Stul76IvC4iy0VkShXPR4vIZyKyQEQ+EJFgEQkUkT0iku766e1+9Pq1KjufL7fnMWl4ki2Q4iciQgKZNDyJZZmHWZWd73QcYxqFW6UvImOBAFUdDCSKSGUXs98KTFPVy4GDwJVAH2COqqa5fja4G7y+/fnzbbSIDOH2wQlORzGN6LZB8bSIDLHr9o3fcHeknwbMdT1ewPeLnf+Hqs5U1YWuX+OAXGAQcLWIrHT9pRB4/nEiMlFEMkQkIy8vz814tbN8xxG+3ZXPA2lJhAUHNMp7Gs8QFnzm0txvdhxh+Y4jTscxpsG5W/oRwH7X43ygVVU7ishgoJmqrgBWAaNUdSAQBFx1/v6qOktVU1U1NS4uzs14tfPiou20bBrCLRd3bJT3M57l1os70rJpCC8s2m6LqBuf527pFwFhrseRVb2OiMQCM4C7XJvWq+rZVaozAMfnODg7yr8/LYnQIBvl+6PQoAAmj+jMyl35Nto3Ps/d0l/N96d0UoDs83cQkWDgH8CTqrrbtXm2iKSISABwPbDOzfevN2dH+T8ZaKN8fzZ+QAfaRIcybaGN9o1vc7f0PwQmiMg0YBywSUSmnrfP3cBFwFOuK3XGA88As4G1wHJVXeTm+9cLG+Wbs0KDAnhgRGcydh9lWeZhp+MY02DE3VGNiDQDRgNLVfVgvaZySU1N1YyMjIZ4aQDGv7KcXYdPsPSXI6z0DafLyhn55y+JaxrCBw9cYushG68lIqtVNbWy59y+Tl9Vj6rq3IYq/IZmo3xzvpDAM+f21+49xlIb7Rsf5bd35P5lsZ3LNz90U//2tIsJ4y92JY/xUX5Z+st3HGHFThvlmx8KDmzC/WlJfLfnGF9n2ZU8xvf4ZenbKN9U58ep7WkTHcpfFtto3/gevyt9G+WbCzm7gM6q7KMs32mjfeNb/K70bZRvamJcagdaRYXwl0WZTkcxpl75Vel/u/PMKH/ScBvlm+qFBgUwaXgS3+7KZ4WN9o0P8avSn7EkixaRNseOqZmfDOxIXNMQpi+20b7xHX5T+qt35/NV1mHuuzTRRvmmRkKDArjv0kS+2XHE5ts3PsNvSn/64ixiI4K5dZCN8k3N3XpxPC0ig220b3yGX5T+2r3H+HJ7HvcOSyQ8+AdT+BtTpbDgACZemsiyzMO2lq7xCX5R+i8tySQmPIgJg+OdjmK80G2D4omNsNG+8Q0+X/ob9xewaEsudw/pRGSIjfJN7YUHB3LvsES+3J7H2r3HnI5jTJ34fOnPWJJJ09BAfjokwekoxotNGBxPTHgQM2y0b7ycT5f+lgOFfL7pEHcN6URUaJDTcYwXiwwJ5O4hnVi8NZeN+wucjmOM23y69F/6IovIkEDuGtLJ6SjGB/x0SAJNQwOZscRG+8Z7+WzpZx46zqcbDvDTS+KJDrdRvqm7qNAg7hzSic83HWLrwUKn4xjjFrdLX0ReF5HlIjKlNvvU5Lj68NIXWYQFBXD30MSGfBvjZ+4akkBkSCAzlmQ5HcUYt7hV+iIyFghQ1cFAooh0qck+NTmuPuzMK2LeuhwmuC61M6a+xIQHc/vgeD7dcICs3ONOxzE+SFV58J3v+GhdToO8vrsj/TRgruvxAmBoDfe54HEiMlFEMkQkIy8vz61wu4+cpE10GPcMs1G+qX/3DEskLCiAl2y0bxrA11lH+Hj9AQqKSxvk9d0t/Qhgv+txPtCqhvtc8DhVnaWqqaqaGhcX51a4EcktWfrLEcQ1DXHreGOqExsRzG2D4vloXQ67Dp9wOo7xMdMXZ9I6KpRxqe0b5PXdLf0iIMz1OLKK16lsn5ocVy8CmkhDvbQx3DOsE0EBTfjrFzbaN/Vnxc4jrMzOZ9LwREICG2ZiSHdLdzXfn5pJAbJruE9NjjPG47VsGsotF3fkgzX72Zt/0uk4xkdMX5xJi8gQbm7ARZ7cLf0PgQkiMg0YB2wSkakX2OeTKrYZ45XuuzSJABFmptto39RdRnY+3+w4wqThDTv9u1ulr6qFnPlSdgUwQlXXqeqUC+xTUNk296Mb46zW0aGMH9CB91fvY/+xYqfjGC83fUkWzSOCG3yRJ7fPqavqUVWdq6oHa7NPTY4zxltMSksC4GUb7Zs6WLPnKEu353HvpQ0//bvP3pFrTGNoFxPGTf07MHfVPg4U2GjfuGfGkixiwoO4bVDDT/9upW9MHT2QlkSFKq98udPpKMYLbdhXwJKtudwztHGmf7fSN6aOOsSGc+NF7Xln5R4OFZ5yOo7xMjOWZBIVGsjtlyQ0yvtZ6RtTDyaP6Ex5hY32Te1szilkweZD3DW08aZ/t9I3ph50bB7ODf3a8b/f7ib3uI32Tc289EUmkSGB3HlJ403/bqVvTD2ZPKIzpeUVzLLRvqmB7YeO89nGg9xxSUKjTv9upW9MPenUIoLr+7bj7W93c7jotNNxjIebvjiT8KAA7h7auIs8WekbU48mj+xMSVkFry610b6p2vZDx/lkwwHuGJJAs0ae/t1K35h6lBQXyTUpbfmf5bs5YqN9U4Wzo/x7HFjkyUrfmHr20MjOnCor57WvdjkdxXigs6P8n17S+KN8sNI3pt51btmUH/Vuw/98k83REyVOxzEe5j+jfIcWebLSN6YBPDSyCydKynndRvvmHJnnjPKdWsrVSt+YBtCt9ZnR/ptf77LRvvmP6UuyHB3lg5W+MQ3mkVFdOFlazqxldiWPOTPK/3h9jqOjfLDSN6bBdG3VlKv7tOWtb7LtSh7jEaN8sNI3pkE9clkXTpWWM8uu2/drZ0f5tzs8ygc3Sl9EXheR5SIypZp9okXkMxFZICIfiEiwiASKyB4RSXf99K5bdGM8X+eWkVzXtx1vLc8m77iN9v3VjCVZhAUFcK/Do3yoZemLyFggQFUHA4ki0qWKXW8Fpqnq5cBB4EqgDzBHVdNcPxvqEtwYb/GQ6y7dV77c4XQU44Cs3OPM84Bz+WfVdqSfBsx1PV4ADK1sJ1WdqaoLXb/GAbnAIOBqEVnp+muh0tUCRGSiiGSISEZeXl4t4xnjeRLjIrmhX3tmr9hNrs2373deWHTmunxPGOXDBUpfRF4553RMOvAQsN/1dD7Q6gLHDwaaqeoKYBUwSlUHAkHAVZUdo6qzVDVVVVPj4uJq92mM8VAPX9aZsgplZrqN9v3J5pxCPll/gLuGdvKIUT5AtWtzqep95/4uIn8Bwly/RlLN/2mISCwwA7jRtWm9qp49qZkBVHVqyBifE988ghsvasc7K/cwaXgSraNDnY5kGsG0hduICg10/Iqdc9X29M5qvj+lkwJkV7aTiAQD/wCeVNXdrs2zRSRFRAKA64F1tY9rjPd6aGQXKiqUmelZTkcxjWDNnqMs2pLLfcOTiA5rvPnyL6S2pf8hMEFEpgHjgE9EpIeITD1vv7uBi4CnXKeGxgPPALOBtcByVV1Ux+zGeJUOseH8OLUD767cy/5jxU7HMQ1s2sLtxEYEc0cjrX1bU7UqfVUt5MyXuSuAEapaoKqbVXXKefu9rKrNzrlS5z1V3aiqfVS1t6o+VX8fwRjv8eDIzijKX7+w0b4vW7HzCMsyD/NAWhIRIdWeRW90tb5OX1WPqupcVT3YEIGM8WXtYsK4eUBH5q7ay+4jJ5yOYxqAqvL8gm20igrhtkHxTsf5Absj15hG9tDIzgQGCC8s3O50FNMAlmYeZlX2UR4c2YXQoACn4/yAlb4xjaxlVCh3DunEv9flsOVAodNxTD06O8pvFxPG+NQOTseplJW+MQ6YdGkSTUMC+fPn25yOYurRgs2HWL+vgEdGdSE40DPr1TNTGePjosODmJSWxOKtuWRk5zsdx9SDigpl2oLtJLaIYGy/dk7HqZKVvjEOufOSTsQ1DeHZ+VtRVafjmDr6eMMBth06zqOjuxIY4LnV6rnJjPFxYcEBPHxZF1ZlHyV9m80z5c1Kyip4fsE2kls35erebZyOUy0rfWMcND61Ax1jw3nu821UVNho31u98+1udh85yeNjkmnSRJyOUy0rfWMcFBzYhMdGd2XLgUI+3nDA6TjGDcdPlTJ9SRaDE5uT1tXzJ4m00jfGYdemtCW5dVOeX7CN0vIKp+OYWpq1dCf5J0p48qpkRDx7lA9W+sY4rkkT4RdXdGP3kZPMzdjrdBxTC4cKT/Hasl1ck9KWPu1jnI5TI1b6xniAkcktSY1vxl8WZVJcUu50HFNDLy7aTllFBb+4vJvTUWrMSt8YDyAiPDEmmdzjp3l1mS2i7g2yco/z3qq93HpxPB2bhzsdp8as9I3xEKkJsVzVuzUvp+/gkC2r6PGenb+N8OBAHhrZ2ekotWKlb4wHefzKZMorzszfYjzXqux8Fm4+xP1pSTSPDHE6Tq1Y6RvjQeKbR3DHkAT+sXofm3IKnI5jKqGq/P7TLbSKCuGuIZ2cjlNrtS59EXldRJaLyJRq9gkUkT3nLKreu6bHGuPvJo/oTExYEL/7ZItNz+CBPt90kDV7jvGzUV0JC/a8qZMvpFalLyJjgQBVHQwkikhVi5v3Aeacs3LWhloca4xfiw4L4meju/LNjiMs3pLrdBxzjtNl5Tw7fxudW0ZyU//2TsdxS21H+mnAXNfjBXy/SPr5BgFXi8hK1+g+sBbHGuP3fjKwI0lxEfz+0y12w5YHefPrbHYdPsGUH3X36EnVqlNtahF55ZxTNOnAQ8B+19P5QKsqDl0FjFLVgUAQcBUQUZNjRWSiiGSISEZenk1CZfxTUEATnvpRd3YePsH/rtjtdBwD5BaeYsbiTEZ1b0lat5ZOx3FbtaWvqvedc4omDZgOhLmejqzm+PWqenYikQygC1BUk2NVdZaqpqpqalyc589jYUxDGdGtJUM7t+DFxZkUnCx1Oo7f++P8rZSWK1N+1MPpKHVS279PVvP9aZkUILuK/WaLSIqIBADXA+tqcawxhjM3bD31o+4UFJcyY0mm03H82nd7jvKv7/Zzz7BOJLSIcDpOnQTWcv8PgWUi0hYYAwwSkR7ALap67hU5zwDvAAJ8pKqLRCTq/GPrHt8Y39a9TRTjUzvw1vJsbh0UTycvLxxvVFGh/PajTbSKCmHyCO+6EasytRrpq2ohZ76QXQGMUNUCVd18XuGjqhtVtY+q9lbVp6o6tj4+gDG+7rHLuxIaGMBvPtpkl3A64P3V+1i/r4Anx3QnIqS242TPU+uvn1X1qKrOVdWDjXmsMf6qZdNQHru8K0u35/HpBvun05gKikt5dv5W+sc347q+bZ2OUy+885ojY/zMhEHx9GwbxTMfb6LodJnTcfzG9MWZ5J8s4elre3rFXPk1YaVvjBcIDGjC1Ot7kXv8NC8s3O50HL+QlXuct77J5uYBHejVLtrpOPXGSt8YL9GvYzN+MrAjf/8mm805hU7H8WmqytPzNhMWHMDPvUVI6vQAAAzwSURBVGiu/Jqw0jfGizx+RTIxYUFM+XCDLaTegD5al8OyzMM8Nrqr182ieSFW+sZ4kejwIJ68qjvf7TlmSys2kPwTJTw9bzMpHWK4fXCC03HqnZW+MV7mxovaMTAhlj/O30r+iRKn4/ic//54M4XFpTx7Y28CmvjGl7fnstI3xsuICFNv6EXRqTL++NkWp+P4lPRtuXywZj8PpCWR3DrK6TgNwkrfGC/UtVVT7h7WibkZ+1iVne90HJ9w4nQZT32wkc4tI5nsZUsg1oaVvjFe6pHLutAuJoxfvr+ekyV27X5d/enzbeQUFPPsjb0JCfS+xVFqykrfGC8VHhzIn37ch12HT/DcfFtTty5W7z7KW8uzmTAonv7xsU7HaVBW+sZ4sUuSWnDHJQn8/Ztsvs467HQcr3S6rJwn/rmeNlGh/PLKZKfjNDgrfWO83ONXJpPYIoJfvr+ewlM2735tvZy+g8zcIn53Q28ifWBCtQux0jfGy4UFB/D8uBQOFBTz3/M2Ox3Hq2w5UMhfv8jiur5tGZHsvath1YaVvjE+oF/HZtyflsQ/Vu9j0eZDTsfxCidLynhozhpiwoP5f1d792pYtWGlb4yPeOSyrnRvE8UT/9pgN23VwDPzNrMjr4gXx/f1uakWqmOlb4yPCA5swrRxKRQUl/DrDzfagivVmLcuh3dX7WXS8CSGdG7hdJxGVevSF5HXRWS5iEypZp/7RSTd9bNWRF4RkUAR2XPO9t51i26MOV/3NlH8bHRXPtlwgI/W5TgdxyPtzT/Jr/61gX4dY3hsdFen4zS6WpW+iIwFAlR1MJAoIl0q209VX1bVNFVNA5YBrwJ9gDlnt6vqhjpmN8ZU4r5Lk+gf34ynPthIVm6R03E8Sml5BQ+/uwaA6Tf3IyjA/0521PYTpwFzXY8XAEOr21lE2gGtVDWDMwuhXy0iK11/LVR6bZSITBSRDBHJyMvLq2U8Y0xAE+GlW/oREtiESW+vtpW2zvHCwu2s2XOM34/tTYfYcKfjOKLa0nedljl7OiYdeAjY73o6H2h1gdefDLzserwKGKWqA4Eg4KrKDlDVWaqaqqqpcXFxNfwYxphztYkOY8Yt/diZV8Qv319n5/eBr7MO8/KXOxif2oFrUnxjvVt3VFv6qnrfOadj0oDpQJjr6cjqjheRJsAIIN21ab2qHnA9zgAqPTVkjKkflyS14IkxyXy64SCzlu50Oo6jjhSd5tH31pLYIoLfXOs/l2dWprand1bz/SmdFCC7mn2HAd/q90OM2SKSIiIBwPXAulq+tzGmlu4dlsiPerfh2flb+cZPp2k4XVbOpLdXU1BcyoyfXER4sO/fdVud2pb+h8AEEZkGjAM+EZEeIjK1kn2vAJae8/szwGxgLbBcVRe5E9gYU3MiwrM39SEpLpIH56wh51ix05EalaryxD83sCr7KH+6qQ892vrmHPm1IbU91ycizYDRwFJVPdggqVxSU1M1IyOjId/CGL+wI6+I6176mqS4CN67bzChQb47dfC5Xly0nRcXZfJfo7vy0GX+c0ZZRFaramplz9X6eiVVPaqqcxu68I0x9ScpLpLnx6Wwbl8Bv/n3Jr/4YveDNft4cVEmN17Ungd9eFGU2vK/i1SN8VNX9GzNgyM6817GXqYt3O50nAa1clc+j7+/gUGJsfxhbG9EfG+tW3f59zcaxviZ/7q8K4eLTjNjSRYRIYFMGp7kdKR6t+vwCSbOzqB9szD+dlt/ggNtbHsuK31j/IiI8LsbenOipJw/fraViOAAJgxOcDpWvTl6ooS7/r6KJiK8eecAYsKDnY7kcaz0jfEzAU2EaeNSKC4p49f/3kRESCBjL2rvdKw6O3ayhDv+vor9R4t5596LiW8e4XQkj2R/9xjjh4ICmvDSLRdxSVJzfvH+euZv9O7rMnKPn2L8KyvYcqCQmbdeRGqCb69zWxdW+sb4qdCgAF69PZU+7aN5eM4alm73zrmu9h09ybi/LWfv0ZO8eccARvW40Oww/s1K3xg/FhESyN/vGEhiXAQTZ2d43Yh/R14RP/7bcvJPlDD77ov9bm58d1jpG+PnosODePuei0luHcWkt1czY3GmV1zHvzmnkHF/W05JWQVzJg6if3wzpyN5BSt9YwwtIkN4d+IgxvZrx/MLt/Pwu2spLil3OlaVVu8+ys2zlhMc2IS5kwbTs22005G8hl29Y4wBzpzjf35cCl1bN+XZ+VvJPnyCV29PpXV0qNPR/qOiQnnj6108N38bbWNCefuei2nfzD/nxXeXjfSNMf8hIkwansRrt6eyM6+Ia176ijV7jjodC4ADBcXc9vq3TP1kC8O7xfGvB4ZY4bvBSt8Y8wOXdW/FB5OHEBYUwPhZK5iZnsXpMudO93yy/gBXvriMtXuP8eyNvZk1oT+xEXbjlTus9I0xleraqin/njyEtK5xPDd/G5e/sJQFmw426pe8x0+V8tjctUx+5zsSWkTwycPDGD+go82lUwe1nlq5MdnUysZ4hmWZeTwzbzOZuUUM69KC/3d1D7q0atpg73eypIz3V+/jlS93cqCgmIdGduHBkZ39ciFzd1Q3tbKVvjGmRkrLK3h7xW5eWLidEyXlTBgUzwMjkmjZtP6+6M07fprZy7P5nxW7OXaylL4dYvj11T3scsxastI3xtSb/BMlPL9gG3NW7kGBAQmxjOnVmit7taZNdNgFj69MVm4Rr3+1k39+t5/S8gpGd2/FxEsT6R/fzE7luKFeS19EWgHvq+qwC+z3OtAD+ERVp1a1rTpW+sZ4rh15Rcxbl8P8jQfZevA4AP06xjCmV2sGJMQSHRZEdFgQUWFB/+e0TG7hKTbmFLBxfyEb9xewKaeQ/ceKCQlswo3923PP0E4kxkU69bF8QnWlX6vr9F1LJb4FVDt9nYiMBQJUdbCIvCEiXYDe529T1czavL8xxnMkxUXy6KiuPDqqKzvzivhs40HmbzzI7z/d+oN9w4MDiA4LorRcOVx0GgAR6NQigv7xzbhzSALX92tHi8iQxv4YfqdWI30RiQIE+LeqplWz33Rgvqp+KiI3A2FAv/O3qeqblRw7EZgI0LFjx/67d++uzecxxjhsb/5JMnOPU1hcRkFxKQXFpRS6/hOge5soerWLpkfbKCJD7P7QhuD2SF9EXgG6nbNpiao+U4NzbBHAftfjfOCiKrb9gKrOAmbBmdM7F3ojY4xn6RAbTodYu2nKU1Vb+qp6n5uvW8SZ0T1AJGfuB6hsmzHGmEbUUMW7GhjqepwCZFexzRhjTCOq8wk1EekB3KKqU87Z/CGwTETaAmOAQYBWss0YY0wjcmukf+6XuKq6+bzCR1ULgTRgBTBCVQsq2+ZmZmOMMW5qsK/OVfUoMPdC24wxxjQe+zLVGGP8iJW+Mcb4ESt9Y4zxIx494ZqI5AHu3pLbAjhcj3Gc4O2fwdvzg/d/Bm/PD97/GZzIH6+qcZU94dGlXxciklHVbcjewts/g7fnB+//DN6eH7z/M3hafju9Y4wxfsRK3xhj/Igvl/4spwPUA2//DN6eH7z/M3h7fvD+z+BR+X32nL4xxpgf8uWRvjHGmPNY6RtTBRGJFZHRItLC6SzG1BefLH0ReV1ElovIlAvv7ZlEpJWILHM6hztEJFpEPhORBSLygYgEO52ptlxLg34MDAS+EJFKr3n2dK7/Ha1xOkdtiUigiOwRkXTXT2+nM7lLRGaKyDVO5zjL50r/3PV5gUTX+rxepaZrEXuwW4Fpqno5cBC40uE87ugDPKaqvwM+p4qV3rzAn/l+8SJv0geYo6pprp8NTgdyh4gMA1qr6jyns5zlc6XPmembz87kuYDvF27xJuXAeKDQ6SDuUNWZqrrQ9WsckOtkHneo6pequkJELuXMaH+505lqS0RGAic483+83mYQcLWIrHT95e51i+mKSBDwKpAtItc5necsXyz989fibeVgFreoaqEvrDcgIoOBZqq6wuks7pAzi0GPB44CpQ7HqRXXKbVfA084ncVNq4BRqjoQCAKucjiPO24HNgPPAQNF5CGH8wC+Wfq2Fq8HEJFYYAZwl9NZ3KVnTAbWA9c6naeWngBmquoxp4O4ab2qHnA9zgC87jQt0A+YpaoHgbeBEQ7nAXyzEG0tXoe5Rpn/AJ5UVXcnzHOUiDwuIre7fo0BvK08RwGTRSQd6Csirzmcp7Zmi0iKiAQA1wPrnA7khiwg0fU4Ffcnj6xXPndzlohEAcuAxbjW4vXWUyUikn7u0pTeQkTuB37P9/9QX1bV9xyMVGuuL9PnAiHARmCyeuk/Fm/835GI9ALeAQT4SFWfcjhSrYlIU+ANzpxiDgJuUtX91R/V8Hyu9OE//2BHA0tdf1oZY4zBR0vfGGNM5XzxnL4xxpgqWOkbY4wfsdI3xhg/YqVvjDF+xErfGGP8yP8HL06+dE5MoYEAAAAASUVORK5CYII=\n",
      "text/plain": [
       "<Figure size 432x288 with 1 Axes>"
      ]
     },
     "metadata": {
      "needs_background": "light"
     },
     "output_type": "display_data"
    }
   ],
   "source": [
    "x = np.linspace(0,2*np.pi,50)\n",
    "y = np.sin(x)\n",
    "plt.plot(x,y)\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "anaconda-cloud": {},
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.8.3"
  },
  "varInspector": {
   "cols": {
    "lenName": 16,
    "lenType": 16,
    "lenVar": 40
   },
   "kernels_config": {
    "python": {
     "delete_cmd_postfix": "",
     "delete_cmd_prefix": "del ",
     "library": "var_list.py",
     "varRefreshCmd": "print(var_dic_list())"
    },
    "r": {
     "delete_cmd_postfix": ") ",
     "delete_cmd_prefix": "rm(",
     "library": "var_list.r",
     "varRefreshCmd": "cat(var_dic_list()) "
    }
   },
   "types_to_exclude": [
    "module",
    "function",
    "builtin_function_or_method",
    "instance",
    "_Feature"
   ],
   "window_display": false
  }
 },
 "nbformat": 4,
 "nbformat_minor": 1
}
